added and ran hyperparam script for all different classifiers, but couldn't successfully run the feature selection and hyperparam together

2022-05-20 08:09:24 +01:00 · 2022-05-20 08:09:24 +01:00 · 37bda41f44
commit 37bda41f44
parent 74af5ef890
18 changed files with 131 additions and 142 deletions
--- a/UQ_LR_p1.py
+++ b/UQ_LR_p1.py
@ -34,12 +34,7 @@ from xgboost import XGBClassifier
 rs = {'random_state': 42}
 njobs = {'n_jobs': 10}
 #%% Get train-test split and scoring functions
-# X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
-#                                                     , num_df_wtgt['mutation_class']
-#                                                     , test_size    = 0.33
-#                                                     , random_state = 2
-#                                                     , shuffle      = True
-#                                                     , stratify     = num_df_wtgt['mutation_class'])
+

 y.to_frame().value_counts().plot(kind = 'bar')
 blind_test_df['dst_mode'].to_frame().value_counts().plot(kind = 'bar')
@ -90,22 +85,22 @@ parameters = [
        'clf__estimator__max_iter': list(range(100,800,100)),
        'clf__estimator__solver': ['saga']
    },
-    # {
-    #     'clf__estimator': [LogisticRegression(**rs)],
-    #     #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
-    #     'clf__estimator__C': np.logspace(0, 4, 10),
-    #     'clf__estimator__penalty': ['l2', 'none'],
-    #     'clf__estimator__max_iter': list(range(100,800,100)),
-    #     'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
-    # }, 
-    # {
-    #     'clf__estimator': [LogisticRegression(**rs)],
-    #     #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
-    #     'clf__estimator__C': np.logspace(0, 4, 10),
-    #     'clf__estimator__penalty': ['l1', 'l2'],
-    #     'clf__estimator__max_iter': list(range(100,800,100)),
-    #     'clf__estimator__solver': ['liblinear']
-    # }
+    {
+        'clf__estimator': [LogisticRegression(**rs)],
+        #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
+        'clf__estimator__C': np.logspace(0, 4, 10),
+        'clf__estimator__penalty': ['l2', 'none'],
+        'clf__estimator__max_iter': list(range(100,800,100)),
+        'clf__estimator__solver': ['newton-cg', 'lbfgs', 'sag']
+    }, 
+    {
+        'clf__estimator': [LogisticRegression(**rs)],
+        #'clf__estimator__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
+        'clf__estimator__C': np.logspace(0, 4, 10),
+        'clf__estimator__penalty': ['l1', 'l2'],
+        'clf__estimator__max_iter': list(range(100,800,100)),
+        'clf__estimator__solver': ['liblinear']
+    }

 ]    

@ -120,7 +115,8 @@ gscv_lr = GridSearchCV(pipeline
                    , parameters
                    #, scoring = 'f1', refit = 'f1'
                    , scoring = mcc_score_fn, refit = 'mcc'
-                    , cv = skf_cv
+                    #, cv = skf_cv
+                    , cv = rskf_cv
                    , **njobs
                    , return_train_score = False
                    , verbose = 3)
@ -138,7 +134,6 @@ print('\nMean test score from fit results:', round(np.nanmean(gscv_lr_fit_be_res

 ###############################################################################

-
 ######################################
 # Blind test
 ######################################
@ -186,7 +181,7 @@ print(lr_bts_df)
 # d3

 # Create df with best model params
-model_params = pd.Series(['best_model_params',  list(gscv_lr_fit_be_mod.items() )])
+model_params = pd.Series(['best_model_params',  list(gscv_lr_fit_be_mod.items())])
 model_params_df = model_params.to_frame()
 model_params_df
 model_params_df.columns = ['Logistic_Regression']
@ -209,3 +204,4 @@ lr_df
 print(confusion_matrix(y_bts, test_predict))

 cm = confusion_matrix(y_bts, test_predict)
+