added all classification algorithms params for gridsearch

This commit is contained in:
Tanushree Tunstall 2022-03-21 13:51:20 +00:00
parent d012542435
commit 0c4f1e1e5f
8 changed files with 503 additions and 110 deletions

View file

@ -37,7 +37,7 @@ class ClfSwitcher(BaseEstimator):
#def recall_score(self, X, y):
# return self.estimator.recall_score(X, y)
#%% Custom GridSearch: IntraModel[orig]
def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical','mixed']) :
def grid_search(input_df, target, sel_cv, var_type = ['numerical', 'categorical','mixed']) :
pipeline1 = Pipeline((
('pre', MinMaxScaler())
@ -73,7 +73,7 @@ def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical
for i in range(len(pars)):
print('IIIII===>', i)
gs = GridSearchCV(pips[i], pars[i]
, cv = skf_cv
, cv = sel_cv
, **scoring_refit
#, refit=False
, **njobs
@ -82,9 +82,21 @@ def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical
print ("finished Gridsearch")
print ('\nBest model:', gs.best_params_)
print ('\nBest score:', gs.best_score_)
#%% Custom grid_search: Intra-Model [with return]
# TODO: add
# # summarize results
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# means = grid_result.cv_results_['mean_test_score']
# stds = grid_result.cv_results_['std_test_score']
# params = grid_result.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
# print("%f (%f) with: %r" % (mean, stdev, param))
# CALL: grid_search [orig]
grid_search()
# #%% Custom grid_search: Intra-Model [with return]
def grid_search(input_df, target
, skf_cv
, sel_cv
, chosen_scoreD #scoring_refit
#, var_type = ['numerical', 'categorical','mixed']
):
@ -128,7 +140,7 @@ def grid_search(input_df, target
print("\nStarting Gridsearch for model:", model_name, i)
gs = GridSearchCV(all_pipelines[i], all_parameters[i]
, cv = skf_cv
, cv = sel_cv
#, **scoring_refit
#, refit=False
, **chosen_scoreD
@ -150,6 +162,9 @@ def grid_search(input_df, target
out[model_name].update(chosen_scoreD.copy())
out[model_name].update({'best_score': gs.best_score_}.copy())
return(out)
# TODO:
# print, or see for each model mean test score and sd, sometimes they can be identical and your best model just picks one!
#%% call CUSTOM grid_search: INTRA model [with return]
# call
chosen_score = {'scoring': 'recall'
@ -158,7 +173,6 @@ mcc_score_fn = {'chosen_scoreD': {'scoring': {'mcc': make_scorer(matthews_corrco
,'refit': 'mcc'}
}
}
intra_models = grid_search(X, y
, skf_cv = skf_cv
, chosen_scoreD= chosen_score