Page 294 - Touhpad Ai
P. 294
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Load the uploaded CSV file
df = pd.read_csv("car details v4.csv")
# Clean the data
# Convert 'Engine' to numeric
df['Engine'] = df['Engine'].str.replace(' cc', '', regex=False).astype(float)
# Extract numeric values from Max Power and Max Torque
df['Max Power'] = df['Max Power'].str.extract(r'(\d+\.?\d*)').astype(float)
df['Max Torque'] = df['Max Torque'].str.extract(r'(\d+\.?\d*)').astype(float)
# Drop rows with missing values
df_cleaned = df.dropna()
# Visualization 1 - Distribution of Prices
plt.figure(figsize=(8, 5))
sns.histplot(df_cleaned['Price'], bins=30, kde=True)
plt.title('Distribution of Car Prices')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
Output:
# Visualization 2 - Price vs Year Scatter Plot
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df_cleaned, x='Year', y='Price', hue='Fuel Type')
plt.title('Car Price vs Year')
plt.xlabel('Year')
plt.ylabel('Price')
292 Touchpad Artificial Intelligence - XI

