ML_AI_training/MultClassPipe.py

106 lines
3.5 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 4 15:25:33 2022
@author: tanu
"""
#%%
import os, sys
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
#%%
rs = {'random_state': 42}
# TODO: add preprocessing step with one hot encoder
# Multiple Classification - Model Pipeline
def MultClassPipeline(X_train, X_test, y_train, y_test):
log_reg = LogisticRegression(**rs)
nb = BernoulliNB()
knn = KNeighborsClassifier()
svm = SVC(**rs)
mlp = MLPClassifier(max_iter=500, **rs)
dt = DecisionTreeClassifier(**rs)
et = ExtraTreesClassifier(**rs)
rf = RandomForestClassifier(**rs)
rf2 = RandomForestClassifier(
min_samples_leaf=50,
n_estimators=150,
bootstrap=True,
oob_score=True,
n_jobs=-1,
random_state=42,
max_features='auto')
xgb = XGBClassifier(**rs, verbosity=0)
clfs = [
('Logistic Regression', log_reg),
('Naive Bayes', nb),
('K-Nearest Neighbors', knn),
('SVM', svm),
('MLP', mlp),
('Decision Tree', dt),
('Extra Trees', et),
('Random Forest', rf),
('Random Forest2', rf2),
('XGBoost', xgb)
]
pipelines = []
scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC'])
for clf_name, clf in clfs:
pipeline = Pipeline(steps=[
('scaler', MinMaxScaler()),
#('scaler', StandardScaler()),
('classifier', clf)
]
)
pipeline.fit(X_train, y_train)
# Model predictions
y_pred = pipeline.predict(X_test)
# F1-Score
fscore = f1_score(y_test, y_pred)
# Precision
pres = precision_score(y_test, y_pred)
# Recall
rcall = recall_score(y_test, y_pred)
# Accuracy
accu = accuracy_score(y_test, y_pred)
# ROC_AUC
roc_auc = roc_auc_score(y_test, y_pred)
pipelines.append(pipeline)
scores_df = scores_df.append({
'Model' : clf_name,
'F1_Score' : fscore,
'Precision' : pres,
'Recall' : rcall,
'Accuracy' : accu,
'ROC_AUC' : roc_auc
},
ignore_index = True)
return pipelines, scores_df