added and ran hyperparam script for all different classifiers, but couldn't successfully run the feature selection and hyperparam together
This commit is contained in:
parent
74af5ef890
commit
37bda41f44
18 changed files with 131 additions and 142 deletions
46
UQ_LR_p1.py
46
UQ_LR_p1.py
|
@ -34,12 +34,7 @@ from xgboost import XGBClassifier
|
|||
rs = {'random_state': 42}
|
||||
njobs = {'n_jobs': 10}
|
||||
#%% Get train-test split and scoring functions
|
||||
# X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
||||
# , num_df_wtgt['mutation_class']
|
||||
# , test_size = 0.33
|
||||
# , random_state = 2
|
||||
# , shuffle = True
|
||||
# , stratify = num_df_wtgt['mutation_class'])
|
||||
|
||||
|
||||
y.to_frame().value_counts().plot(kind = 'bar')
|
||||
blind_test_df['dst_mode'].to_frame().value_counts().plot(kind = 'bar')
|
||||
|
@ -90,22 +85,22 @@ parameters = [
|
|||
'clf__estimator__max_iter': list(range(100,800,100)),
|
||||
'clf__estimator__solver': ['saga']
|
||||
},
|
||||
# {
|
||||
# 'clf__estimator': [LogisticRegression(**rs)],
|
||||
# #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
|
||||
# 'clf__estimator__C': np.logspace(0, 4, 10),
|
||||
# 'clf__estimator__penalty': ['l2', 'none'],
|
||||
# 'clf__estimator__max_iter': list(range(100,800,100)),
|
||||
# 'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
|
||||
# },
|
||||
# {
|
||||
# 'clf__estimator': [LogisticRegression(**rs)],
|
||||
# #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
|
||||
# 'clf__estimator__C': np.logspace(0, 4, 10),
|
||||
# 'clf__estimator__penalty': ['l1', 'l2'],
|
||||
# 'clf__estimator__max_iter': list(range(100,800,100)),
|
||||
# 'clf__estimator__solver': ['liblinear']
|
||||
# }
|
||||
{
|
||||
'clf__estimator': [LogisticRegression(**rs)],
|
||||
#'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
|
||||
'clf__estimator__C': np.logspace(0, 4, 10),
|
||||
'clf__estimator__penalty': ['l2', 'none'],
|
||||
'clf__estimator__max_iter': list(range(100,800,100)),
|
||||
'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
|
||||
},
|
||||
{
|
||||
'clf__estimator': [LogisticRegression(**rs)],
|
||||
#'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
|
||||
'clf__estimator__C': np.logspace(0, 4, 10),
|
||||
'clf__estimator__penalty': ['l1', 'l2'],
|
||||
'clf__estimator__max_iter': list(range(100,800,100)),
|
||||
'clf__estimator__solver': ['liblinear']
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
|
@ -120,7 +115,8 @@ gscv_lr = GridSearchCV(pipeline
|
|||
, parameters
|
||||
#, scoring = 'f1', refit = 'f1'
|
||||
, scoring = mcc_score_fn, refit = 'mcc'
|
||||
, cv = skf_cv
|
||||
#, cv = skf_cv
|
||||
, cv = rskf_cv
|
||||
, **njobs
|
||||
, return_train_score = False
|
||||
, verbose = 3)
|
||||
|
@ -138,7 +134,6 @@ print('\nMean test score from fit results:', round(np.nanmean(gscv_lr_fit_be_res
|
|||
|
||||
###############################################################################
|
||||
|
||||
|
||||
######################################
|
||||
# Blind test
|
||||
######################################
|
||||
|
@ -186,7 +181,7 @@ print(lr_bts_df)
|
|||
# d3
|
||||
|
||||
# Create df with best model params
|
||||
model_params = pd.Series(['best_model_params', list(gscv_lr_fit_be_mod.items() )])
|
||||
model_params = pd.Series(['best_model_params', list(gscv_lr_fit_be_mod.items())])
|
||||
model_params_df = model_params.to_frame()
|
||||
model_params_df
|
||||
model_params_df.columns = ['Logistic_Regression']
|
||||
|
@ -209,3 +204,4 @@ lr_df
|
|||
print(confusion_matrix(y_bts, test_predict))
|
||||
|
||||
cm = confusion_matrix(y_bts, test_predict)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue