#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Mar 4 15:25:33 2022 @author: tanu """ #%% import os, sys import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier from sklearn.neural_network import MLPClassifier from sklearn.pipeline import Pipeline from xgboost import XGBClassifier from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score #%% rs = {'random_state': 42} # Multiple Classification - Model Pipeline def MultClassPipeline(X_train, X_test, y_train, y_test): log_reg = LogisticRegression(**rs) nb = BernoulliNB() knn = KNeighborsClassifier() svm = SVC(**rs) mlp = MLPClassifier(max_iter=500, **rs) dt = DecisionTreeClassifier(**rs) et = ExtraTreesClassifier(**rs) rf = RandomForestClassifier(**rs) xgb = XGBClassifier(**rs, verbosity=0) clfs = [ ('Logistic Regression', log_reg), ('Naive Bayes', nb), ('K-Nearest Neighbors', knn), ('SVM', svm), ('MLP', mlp), ('Decision Tree', dt), ('Extra Trees', et), ('Random Forest', rf), ('XGBoost', xgb) ] pipelines = [] scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC']) for clf_name, clf in clfs: pipeline = Pipeline(steps=[ ('scaler', MinMaxScaler()), #('scaler', StandardScaler()), ('classifier', clf) ] ) pipeline.fit(X_train, y_train) # Model predictions y_pred = pipeline.predict(X_test) # F1-Score fscore = f1_score(y_test, y_pred) # Precision pres = precision_score(y_test, y_pred) # Recall rcall = recall_score(y_test, y_pred) # Accuracy accu = accuracy_score(y_test, y_pred) # ROC_AUC roc_auc = roc_auc_score(y_test, y_pred) pipelines.append(pipeline) scores_df = scores_df.append({ 'Model' : clf_name, 'F1_Score' : fscore, 'Precision' : pres, 'Recall' : rcall, 'Accuracy' : accu, 'ROC_AUC' : roc_auc }, ignore_index = True) return pipelines, scores_df