Workspace
Vimbiso Kadirire/

CARPRICE PREDICTION

0
Beta
Spinner
#Section A
#a. 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

Cars = pd.read_csv('price_prediction_batch_20.csv')
Cars.head()
#Assessing how the features in the dataframe look.
Cars.describe()
Cars.info()
Cars.nunique()
Cars.describe()
#b. 
Cars['Levy'].isna().sum()
import pandas as pd

# Changing the list of columns into Category dtype
Categorical = ['Category', 'Fuel type','Gear box type', 'Wheel','Drive wheels','Manufacturer','Color','Leather interior']
for i in Categorical:
    Cars[i] = Cars[i].astype('category')
    #checking if the conversion was successful
    assert isinstance(Cars[i].dtype, pd.CategoricalDtype)
#Cleaning Levy column
Cars['Levy'] = Cars['Levy'].str.replace(r'\W', '', regex=True)
#Converting to float
Cars['Levy'] = pd.to_numeric(Cars['Levy'], errors='coerce')
assert Cars['Levy'].dtype == float
#cleaning numerical datatypes
#remove km from Mileage
Cars['Mileage'] = Cars['Mileage'].str.replace(' km', '')
#Converting to float dtype and asserting
Cars['Mileage'] = Cars['Mileage'].astype('float')
assert Cars['Mileage'].dtype == 'float'
Cars = Cars.drop(columns = ['Model'])
print(Cars.head())
#assessing Engine volume features 
Cars['Engine volume'].value_counts()
#cleaning Cars['Engine Volume'
Cars['Engine volume'] = Cars['Engine volume'].str.extract(pat='(\d+)', expand=False)
#converting Eng Volume to fl
Cars['Engine volume'] = Cars['Engine volume'].astype('float')
assert Cars['Engine volume'].dtype == 'float'
#Displaying all rows with Null values
Cars[Cars.isna().any(axis=1)]
#finding null values and see how significant they are
Null_items = Cars.isnull().sum()
print(Null_items)
#There are too many null values in the Levy column. we will impute with the mean
Cars['Levy'] = Cars['Levy'].fillna(Cars['Levy'].mean())



  • AI Chat
  • Code