added all classification algorithms params for gridsearch

2022-03-21 13:51:20 +00:00 · 2022-03-21 13:51:20 +00:00 · 0c4f1e1e5f
commit 0c4f1e1e5f
parent d012542435
8 changed files with 503 additions and 110 deletions
--- a/intra_model_gscv.py
+++ b/intra_model_gscv.py
@ -37,7 +37,7 @@ class ClfSwitcher(BaseEstimator):
    #def recall_score(self, X, y):
    #    return self.estimator.recall_score(X, y)
 #%% Custom GridSearch: IntraModel[orig]
-def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical','mixed']) :
+def grid_search(input_df, target, sel_cv, var_type = ['numerical', 'categorical','mixed']) :
    
    pipeline1 = Pipeline((
    ('pre', MinMaxScaler())
@ -73,7 +73,7 @@ def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical
    for i in range(len(pars)):
        print('IIIII===>', i)
        gs = GridSearchCV(pips[i], pars[i]
-                          , cv = skf_cv
+                          , cv = sel_cv
                          , **scoring_refit
                          #, refit=False
                          , **njobs
@ -82,9 +82,21 @@ def grid_search2(input_df, target, skf_cv, var_type = ['numerical', 'categorical
        print ("finished Gridsearch")
        print ('\nBest model:', gs.best_params_)
        print ('\nBest score:', gs.best_score_)
-#%% Custom grid_search: Intra-Model [with return]
+# TODO: add
+#         # summarize results
+#         print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+#         means = grid_result.cv_results_['mean_test_score']
+#         stds = grid_result.cv_results_['std_test_score']
+#         params = grid_result.cv_results_['params']
+#         for mean, stdev, param in zip(means, stds, params):
+#             print("%f (%f) with: %r" % (mean, stdev, param))
+
+# CALL: grid_search [orig]
+grid_search()
+
+# #%% Custom grid_search: Intra-Model [with return]
 def grid_search(input_df, target
-                , skf_cv
+                , sel_cv
                , chosen_scoreD #scoring_refit 
                #, var_type = ['numerical', 'categorical','mixed']
                ):
@ -128,7 +140,7 @@ def grid_search(input_df, target
    
        print("\nStarting Gridsearch for model:", model_name, i)
        gs = GridSearchCV(all_pipelines[i], all_parameters[i]
-                          , cv = skf_cv
+                          , cv = sel_cv
                          #, **scoring_refit
                          #, refit=False
                          , **chosen_scoreD
@ -150,6 +162,9 @@ def grid_search(input_df, target
        out[model_name].update(chosen_scoreD.copy())
        out[model_name].update({'best_score': gs.best_score_}.copy())
    return(out)
+
+# TODO: 
+# print, or see for each model mean test score and sd, sometimes they can be identical and your best model just picks one!
 #%% call CUSTOM grid_search: INTRA model [with return]
 # call
 chosen_score =  {'scoring': 'recall'
@ -158,7 +173,6 @@ mcc_score_fn = {'chosen_scoreD': {'scoring': {'mcc': make_scorer(matthews_corrco
                                            ,'refit': 'mcc'}
                                  }
                }
-
 intra_models = grid_search(X, y
            , skf_cv = skf_cv
            , chosen_scoreD= chosen_score