added file containing model names and hyperaprams to run for all models inc FS
This commit is contained in:
parent
9c07ad3ce8
commit
5d6dccfc09
6 changed files with 536 additions and 299 deletions
46
UQ_FS_fn.py
46
UQ_FS_fn.py
|
@ -10,19 +10,26 @@ Created on Mon May 23 23:25:26 2022
|
|||
def fsgs(input_df
|
||||
, target
|
||||
, blind_test_df = pd.DataFrame()
|
||||
, blind_test_target = pd.Series(dtype = 'int64')
|
||||
#, y_trueS = pd.Series()
|
||||
, estimator = LogisticRegression(**rs)
|
||||
, param_gridLd = {}
|
||||
, cv_method = 10
|
||||
, cv_method = StratifiedKFold(n_splits = 10
|
||||
, shuffle = True,**rs)
|
||||
, var_type = ['numerical'
|
||||
, 'categorical'
|
||||
, 'mixed']
|
||||
, fs_estimator = [LogisticRegression(**rs)]
|
||||
, fs = RFECV(DecisionTreeClassifier(**rs) , cv = 10, scoring = 'matthews_corrcoef')
|
||||
, fs = RFECV(DecisionTreeClassifier(**rs)
|
||||
, cv = StratifiedKFold(n_splits = 10
|
||||
, shuffle = True,**rs)
|
||||
, scoring = 'matthews_corrcoef')
|
||||
):
|
||||
'''
|
||||
returns
|
||||
Dict containing results from FS and hyperparam tuning
|
||||
Dict containing results from FS and hyperparam tuning for a given estiamtor
|
||||
>>> ADD MORE <<<
|
||||
optimised/selected based on mcc
|
||||
'''
|
||||
# Determine categorical and numerical features
|
||||
numerical_ix = input_df.select_dtypes(include=['int64', 'float64']).columns
|
||||
|
@ -68,11 +75,10 @@ def fsgs(input_df
|
|||
############################################################################
|
||||
# Create Pipeline object
|
||||
pipe = Pipeline([
|
||||
#('pre', MinMaxScaler()),
|
||||
('pre', col_transform),
|
||||
('fs', fs),
|
||||
#('clf', LogisticRegression(**rs))])
|
||||
('clf', estimator)])
|
||||
('pre', col_transform),
|
||||
('fs', fs),
|
||||
#('clf', LogisticRegression(**rs))])
|
||||
('clf', estimator)])
|
||||
############################################################################
|
||||
# Define GridSearchCV
|
||||
gscv_fs = GridSearchCV(pipe
|
||||
|
@ -119,8 +125,8 @@ def fsgs(input_df
|
|||
#tp = gscv_fs.predict(X_bts)
|
||||
tp = gscv_fs.predict(blind_test_df)
|
||||
|
||||
print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, tp),2))
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, tp),2))
|
||||
print('\nMCC on Blind test:' , round(matthews_corrcoef(blind_test_target, tp),2))
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(blind_test_target, tp),2))
|
||||
|
||||
#=================
|
||||
# info extraction
|
||||
|
@ -191,9 +197,9 @@ def fsgs(input_df
|
|||
#bts_predict = gscv_fs.predict(X_bts)
|
||||
bts_predict = gscv_fs.predict(blind_test_df)
|
||||
|
||||
print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, bts_predict),2))
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, bts_predict),2))
|
||||
bts_mcc_score = round(matthews_corrcoef(y_bts, bts_predict),2)
|
||||
print('\nMCC on Blind test:' , round(matthews_corrcoef(blind_test_target, bts_predict),2))
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(blind_test_target, bts_predict),2))
|
||||
bts_mcc_score = round(matthews_corrcoef(blind_test_target, bts_predict),2)
|
||||
|
||||
# Diff b/w train and bts test scores
|
||||
train_test_diff = train_bscore - bts_mcc_score
|
||||
|
@ -213,12 +219,12 @@ def fsgs(input_df
|
|||
|
||||
lr_btsD
|
||||
#lr_btsD['bts_mcc'] = bts_mcc_score
|
||||
lr_btsD['bts_fscore'] = round(f1_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_precision'] = round(precision_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_recall'] = round(recall_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_accuracy'] = round(accuracy_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_roc_auc'] = round(roc_auc_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_jaccard'] = round(jaccard_score(y_bts, bts_predict),2)
|
||||
lr_btsD['bts_fscore'] = round(f1_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD['bts_precision'] = round(precision_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD['bts_recall'] = round(recall_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD['bts_accuracy'] = round(accuracy_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD['bts_roc_auc'] = round(roc_auc_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD['bts_jaccard'] = round(jaccard_score(blind_test_target, bts_predict),2)
|
||||
lr_btsD
|
||||
|
||||
#===========================
|
||||
|
@ -229,7 +235,7 @@ def fsgs(input_df
|
|||
fs_methodf = str(gscv_fs.best_estimator_.named_steps['fs'])
|
||||
all_featuresL = list(all_features)
|
||||
fs_res_arrayf = str(list( gscv_fs.best_estimator_.named_steps['fs'].get_support()))
|
||||
fs_res_array_rankf = list( gscv_fs.best_estimator_.named_steps['fs'].ranking_)
|
||||
fs_res_array_rankf = str(list( gscv_fs.best_estimator_.named_steps['fs'].ranking_))
|
||||
sel_featuresf = list(sel_features)
|
||||
n_sf = int(n_sf)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue