Page 294 - Touhpad Ai
P. 294

from sklearn.linear_model import LinearRegression
                   from sklearn.model_selection import train_test_split
                   from sklearn.metrics import mean_squared_error, r2_score

                   # Load the uploaded CSV file
                   df = pd.read_csv("car details v4.csv")

                   # Clean the data

                   # Convert 'Engine' to numeric
                   df['Engine'] = df['Engine'].str.replace(' cc', '', regex=False).astype(float)

                   # Extract numeric values from Max Power and Max Torque
                   df['Max Power'] = df['Max Power'].str.extract(r'(\d+\.?\d*)').astype(float)
                   df['Max Torque'] = df['Max Torque'].str.extract(r'(\d+\.?\d*)').astype(float)
                   # Drop rows with missing values
                   df_cleaned = df.dropna()

                   # Visualization 1 - Distribution of Prices
                   plt.figure(figsize=(8, 5))
                   sns.histplot(df_cleaned['Price'], bins=30, kde=True)
                   plt.title('Distribution of Car Prices')
                   plt.xlabel('Price')
                   plt.ylabel('Frequency')
                   plt.grid(True)
                   plt.show()
                   Output:

























                   # Visualization 2 - Price vs Year Scatter Plot
                   plt.figure(figsize=(8, 5))
                   sns.scatterplot(data=df_cleaned, x='Year', y='Price', hue='Fuel Type')
                   plt.title('Car Price vs Year')
                   plt.xlabel('Year')
                   plt.ylabel('Price')


                 292    Touchpad Artificial Intelligence - XI
   289   290   291   292   293   294   295   296   297   298   299