trying Stratified Kfold split on running multiple pipelines

This commit is contained in:
Tanushree Tunstall 2022-03-09 18:35:54 +00:00
parent bb8f6f70ba
commit 1bfb35c30c
7 changed files with 287 additions and 72 deletions

View file

@ -20,7 +20,8 @@ from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef
#%%
rs = {'random_state': 42}
# TODO: add preprocessing step with one hot encoder
@ -63,7 +64,7 @@ def MultClassPipeline(X_train, X_test, y_train, y_test):
pipelines = []
scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC'])
scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'MCC', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC'])
for clf_name, clf in clfs:
@ -83,24 +84,26 @@ def MultClassPipeline(X_train, X_test, y_train, y_test):
# Precision
pres = precision_score(y_test, y_pred)
# Recall
rcall = recall_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
# Accuracy
accu = accuracy_score(y_test, y_pred)
# ROC_AUC
roc_auc = roc_auc_score(y_test, y_pred)
# Matthews correlation coefficient
mcc = matthews_corrcoef(y_test, y_pred)
pipelines.append(pipeline)
scores_df = scores_df.append({
'Model' : clf_name,
'F1_Score' : fscore,
'Precision' : pres,
'Recall' : rcall,
'Accuracy' : accu,
'ROC_AUC' : roc_auc
},
ignore_index = True)
'Model' : clf_name
, 'F1_Score' : fscore
, 'MCC' : mcc
, 'Precision' : pres
, 'Recall' : recall
, 'Accuracy' : accu
, 'ROC_AUC' : roc_auc
}
, ignore_index = True)
return pipelines, scores_df