added and ran hyperparam script for all different classifiers, but couldn't successfully run the feature selection and hyperparam together

This commit is contained in:
Tanushree Tunstall 2022-05-20 08:09:24 +01:00
parent 74af5ef890
commit 37bda41f44
18 changed files with 131 additions and 142 deletions

View file

@ -34,12 +34,7 @@ from xgboost import XGBClassifier
rs = {'random_state': 42}
njobs = {'n_jobs': 10}
#%% Get train-test split and scoring functions
# X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
# , num_df_wtgt['mutation_class']
# , test_size = 0.33
# , random_state = 2
# , shuffle = True
# , stratify = num_df_wtgt['mutation_class'])
y.to_frame().value_counts().plot(kind = 'bar')
blind_test_df['dst_mode'].to_frame().value_counts().plot(kind = 'bar')
@ -90,22 +85,22 @@ parameters = [
'clf__estimator__max_iter': list(range(100,800,100)),
'clf__estimator__solver': ['saga']
},
# {
# 'clf__estimator': [LogisticRegression(**rs)],
# #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
# 'clf__estimator__C': np.logspace(0, 4, 10),
# 'clf__estimator__penalty': ['l2', 'none'],
# 'clf__estimator__max_iter': list(range(100,800,100)),
# 'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
# },
# {
# 'clf__estimator': [LogisticRegression(**rs)],
# #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
# 'clf__estimator__C': np.logspace(0, 4, 10),
# 'clf__estimator__penalty': ['l1', 'l2'],
# 'clf__estimator__max_iter': list(range(100,800,100)),
# 'clf__estimator__solver': ['liblinear']
# }
{
'clf__estimator': [LogisticRegression(**rs)],
#'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
'clf__estimator__C': np.logspace(0, 4, 10),
'clf__estimator__penalty': ['l2', 'none'],
'clf__estimator__max_iter': list(range(100,800,100)),
'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
},
{
'clf__estimator': [LogisticRegression(**rs)],
#'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
'clf__estimator__C': np.logspace(0, 4, 10),
'clf__estimator__penalty': ['l1', 'l2'],
'clf__estimator__max_iter': list(range(100,800,100)),
'clf__estimator__solver': ['liblinear']
}
]
@ -120,7 +115,8 @@ gscv_lr = GridSearchCV(pipeline
, parameters
#, scoring = 'f1', refit = 'f1'
, scoring = mcc_score_fn, refit = 'mcc'
, cv = skf_cv
#, cv = skf_cv
, cv = rskf_cv
, **njobs
, return_train_score = False
, verbose = 3)
@ -138,7 +134,6 @@ print('\nMean test score from fit results:', round(np.nanmean(gscv_lr_fit_be_res
###############################################################################
######################################
# Blind test
######################################
@ -186,7 +181,7 @@ print(lr_bts_df)
# d3
# Create df with best model params
model_params = pd.Series(['best_model_params', list(gscv_lr_fit_be_mod.items() )])
model_params = pd.Series(['best_model_params', list(gscv_lr_fit_be_mod.items())])
model_params_df = model_params.to_frame()
model_params_df
model_params_df.columns = ['Logistic_Regression']
@ -209,3 +204,4 @@ lr_df
print(confusion_matrix(y_bts, test_predict))
cm = confusion_matrix(y_bts, test_predict)