git add UQ_imbalance.py
This commit is contained in:
parent
42c8c47e2d
commit
1da87ba177
4 changed files with 134 additions and 56 deletions
|
@ -76,8 +76,8 @@ from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
|
|||
rs = {'random_state': 42}
|
||||
njobs = {'n_jobs': 10}
|
||||
|
||||
scoring_fn = ({ 'fscore' : make_scorer(f1_score)
|
||||
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||
scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef)
|
||||
, 'fscore' : make_scorer(f1_score)
|
||||
, 'precision' : make_scorer(precision_score)
|
||||
, 'recall' : make_scorer(recall_score)
|
||||
, 'accuracy' : make_scorer(accuracy_score)
|
||||
|
@ -87,7 +87,10 @@ scoring_fn = ({ 'fscore' : make_scorer(f1_score)
|
|||
|
||||
|
||||
# Multiple Classification - Model Pipeline
|
||||
def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categorical','mixed']):
|
||||
def MultClassPipeSKFCV(input_df, target, skf_cv
|
||||
, blind_test_input_df
|
||||
, blind_test_target
|
||||
, var_type = ['numerical', 'categorical','mixed']):
|
||||
|
||||
'''
|
||||
@ param input_df: input features
|
||||
|
@ -120,8 +123,8 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
|
|||
t = [('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
if var_type == 'mixed':
|
||||
t = [('cat', OneHotEncoder(), categorical_ix)
|
||||
, ('num', MinMaxScaler(), numerical_ix)]
|
||||
t = [('num', MinMaxScaler(), numerical_ix)
|
||||
, ('cat', OneHotEncoder(), categorical_ix) ]
|
||||
|
||||
col_transform = ColumnTransformer(transformers = t
|
||||
, remainder='passthrough')
|
||||
|
@ -137,7 +140,7 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
|
|||
rf = RandomForestClassifier(**rs, n_estimators = 1000 )
|
||||
rf2 = RandomForestClassifier(
|
||||
min_samples_leaf = 5
|
||||
, n_estimators = 100 #10
|
||||
, n_estimators = 1000
|
||||
, bootstrap = True
|
||||
, oob_score = True
|
||||
, **njobs
|
||||
|
@ -158,16 +161,16 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
|
|||
, ('K-Nearest Neighbors', knn)
|
||||
, ('SVM' , svm)
|
||||
, ('MLP' , mlp)
|
||||
# , ('Decision Tree' , dt)
|
||||
# , ('Extra Trees' , et)
|
||||
# , ('Random Forest' , rf)
|
||||
# , ('Naive Bayes' , nb)
|
||||
# , ('Random Forest2' , rf2)
|
||||
# , ('XGBoost' , xgb)
|
||||
# , ('LDA' , lda)
|
||||
# , ('MultinomialNB' , mnb)
|
||||
# , ('PassiveAggresive' , pa)
|
||||
# , ('StochasticGDescent' , sgd)
|
||||
, ('Decision Tree' , dt)
|
||||
, ('Extra Trees' , et)
|
||||
, ('Random Forest' , rf)
|
||||
, ('Naive Bayes' , nb)
|
||||
, ('Random Forest2' , rf2)
|
||||
, ('XGBoost' , xgb)
|
||||
, ('LDA' , lda)
|
||||
, ('MultinomialNB' , mnb)
|
||||
, ('PassiveAggresive' , pa)
|
||||
, ('StochasticGDescent' , sgd)
|
||||
]
|
||||
|
||||
mm_skf_scoresD = {}
|
||||
|
@ -196,48 +199,41 @@ def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categ
|
|||
#pp.pprint(mm_skf_scoresD)
|
||||
|
||||
#return(mm_skf_scoresD)
|
||||
|
||||
|
||||
|
||||
|
||||
#%%
|
||||
#=========================
|
||||
# Blind test: BTS results
|
||||
#=========================
|
||||
# Build the final results with all scores for a feature selected model
|
||||
#bts_predict = gscv_fs.predict(X_bts)
|
||||
#bts_predict = gscv_fs.predict(blind_test_input_df)
|
||||
model_pipeline.fit(input_df, target)
|
||||
bts_predict = model_pipeline.predict(X_bts)
|
||||
bts_predict = model_pipeline.predict(blind_test_input_df)
|
||||
|
||||
print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, bts_predict),2))
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, bts_predict),2))
|
||||
bts_mcc_score = round(matthews_corrcoef(y_bts, bts_predict),2)
|
||||
bts_mcc_score = round(matthews_corrcoef(blind_test_target, bts_predict),2)
|
||||
print('\nMCC on Blind test:' , bts_mcc_score)
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(blind_test_target, bts_predict),2))
|
||||
|
||||
# Diff b/w train and bts test scores
|
||||
# train_test_diff = train_bscore - bts_mcc_score
|
||||
# print('\nDiff b/w train and blind test score (MCC):', train_test_diff)
|
||||
|
||||
|
||||
# create a dict with all scores
|
||||
lr_btsD = { 'model_name': model_name
|
||||
, 'bts_mcc':None
|
||||
, 'bts_fscore':None
|
||||
, 'bts_precision':None
|
||||
, 'bts_recall':None
|
||||
, 'bts_accuracy':None
|
||||
, 'bts_roc_auc':None
|
||||
, 'bts_jaccard':None}
|
||||
# # create a dict with all scores
|
||||
# lr_btsD = { 'model_name': model_name
|
||||
# , 'bts_mcc':None
|
||||
# , 'bts_fscore':None
|
||||
# , 'bts_precision':None
|
||||
# , 'bts_recall':None
|
||||
# , 'bts_accuracy':None
|
||||
# , 'bts_roc_auc':None
|
||||
# , 'bts_jaccard':None}
|
||||
|
||||
|
||||
lr_btsD
|
||||
lr_btsD['bts_mcc'] = bts_mcc_score
|
||||
lr_btsD['bts_fscore'] = round(f1_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_precision'] = round(precision_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_recall'] = round(recall_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_accuracy'] = round(accuracy_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_roc_auc'] = round(roc_auc_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_jaccard'] = round(jaccard_score(y_bts, bts_predict),2)
|
||||
lr_btsD
|
||||
|
||||
return(lr_btsD)
|
||||
mm_skf_scoresD[model_name]['bts_mcc'] = bts_mcc_score
|
||||
mm_skf_scoresD[model_name]['bts_fscore'] = round(f1_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_precision'] = round(precision_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_recall'] = round(recall_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_accuracy'] = round(accuracy_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_roc_auc'] = round(roc_auc_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_jaccard'] = round(jaccard_score(blind_test_target, bts_predict),2)
|
||||
return(mm_skf_scoresD)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue