trying Stratified Kfold split on running multiple pipelines
This commit is contained in:
parent
bb8f6f70ba
commit
1bfb35c30c
7 changed files with 287 additions and 72 deletions
|
@ -21,7 +21,8 @@ from sklearn.compose import ColumnTransformer
|
|||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
|
||||
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef
|
||||
#%%
|
||||
rs = {'random_state': 42}
|
||||
# Done: add preprocessing step with one hot encoder
|
||||
|
@ -70,10 +71,9 @@ def MultClassPipeline2(X_train, X_test, y_train, y_test, input_df):
|
|||
('XGBoost', xgb)
|
||||
]
|
||||
|
||||
|
||||
pipelines = []
|
||||
|
||||
scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC'])
|
||||
scores_df = pd.DataFrame(columns=['Model', 'F1_Score', 'MCC', 'Precision', 'Recall', 'Accuracy', 'ROC_AUC'])
|
||||
|
||||
for clf_name, clf in clfs:
|
||||
#%%
|
||||
|
@ -101,10 +101,12 @@ def MultClassPipeline2(X_train, X_test, y_train, y_test, input_df):
|
|||
|
||||
# F1-Score
|
||||
fscore = f1_score(y_test, y_pred)
|
||||
# Matthews correlation coefficient
|
||||
mcc = matthews_corrcoef(y_test, y_pred)
|
||||
# Precision
|
||||
pres = precision_score(y_test, y_pred)
|
||||
# Recall
|
||||
rcall = recall_score(y_test, y_pred)
|
||||
recall = recall_score(y_test, y_pred)
|
||||
# Accuracy
|
||||
accu = accuracy_score(y_test, y_pred)
|
||||
# ROC_AUC
|
||||
|
@ -113,15 +115,15 @@ def MultClassPipeline2(X_train, X_test, y_train, y_test, input_df):
|
|||
pipelines.append(pipeline)
|
||||
|
||||
scores_df = scores_df.append({
|
||||
'Model' : clf_name,
|
||||
'F1_Score' : fscore,
|
||||
'Precision' : pres,
|
||||
'Recall' : rcall,
|
||||
'Accuracy' : accu,
|
||||
'ROC_AUC' : roc_auc
|
||||
|
||||
},
|
||||
ignore_index = True)
|
||||
'Model' : clf_name
|
||||
, 'F1_Score' : fscore
|
||||
, 'MCC' : mcc
|
||||
, 'Precision' : pres
|
||||
, 'Recall' : recall
|
||||
, 'Accuracy' : accu
|
||||
, 'ROC_AUC' : roc_auc
|
||||
}
|
||||
, ignore_index = True)
|
||||
|
||||
return pipelines, scores_df
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue