Page 450 - Ai_V3.0_c11_flipbook
P. 450
# Present initial findings from the cleaned dataset
print("\nInitial Findings after Cleaning:")
print("Number of records after cleaning:", len(df))
print("Summary statistics for numerical columns:")
print(df.describe())
print("Summary statistics for categorical columns:")
print(df.describe(include=['object']))
else:
print("DataFrame not loaded successfully.")
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
# Check if DataFrame is loaded successfully
if df is not None and not df.empty:
# Splitting the dataset into features (X) and the target variable (y)
X = df.drop(columns=['flag']) # Features
y = df['flag'] # Target variable
# Splitting the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_
state=42)
# Define preprocessing steps for numerical and categorical features
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X_train.select_dtypes(include=['object']).columns
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent')), # Impute missing
values with most frequent value
('onehot', OneHotEncoder(handle_unknown='ignore')) # One-hot encode
categorical variables])
preprocessor = ColumnTransformer(
transformers=[('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)])
# Combine preprocessing with the model
rf_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', RandomForestClassifier())])
svm_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', SVC())])
# Model Training
rf_pipeline.fit(X_train, y_train)
svm_pipeline.fit(X_train, y_train)
448 Touchpad Artificial Intelligence (Ver. 3.0)-XI

