finally made the fs work within class and without

2022-05-21 13:30:04 +01:00 · 2022-05-21 13:30:04 +01:00 · 39cd7b4259
commit 39cd7b4259
parent 4a9e9dfedf
3 changed files with 95 additions and 110 deletions
--- a/uq_ml_models/UQ_LR_FS.py
+++ b/uq_ml_models/UQ_LR_FS.py
@ -12,85 +12,6 @@ Created on Tue Mar 15 11:09:50 2022

@author: tanu
 """
-#%% Import libs
-rs = {'random_state': 42}
-njobs = {'n_jobs': 10}
-
-scoring_fn =  ({'accuracy'      : make_scorer(accuracy_score)
-                 , 'fscore'     : make_scorer(f1_score)
-                 , 'mcc'        : make_scorer(matthews_corrcoef)
-                 ,  'precision' : make_scorer(precision_score)
-                 ,  'recall'    : make_scorer(recall_score)
-                 ,  'roc_auc'   : make_scorer(roc_auc_score)
-                 ,  'jaccard'   : make_scorer(jaccard_score)
-            })    
-
-mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)}
-jacc_score_fn = {'jcc': make_scorer(jaccard_score)}
-#%% Get data
-y.to_frame().value_counts().plot(kind = 'bar')
-blind_test_df['dst_mode'].to_frame().value_counts().plot(kind = 'bar')
-
-# %% Logistic Regression + FS + hyperparameter
-# https://www.tomasbeuzen.com/post/scikit-learn-gridsearch-pipelines/
-# from sklearn.feature_selection import SelectKBest, mutual_info_classif
-
-# # Create pipeline
-# pipe = Pipeline([
-#     ('pre', MinMaxScaler())
-#     , ('fs', RFECV( LogisticRegression(**rs), cv = skf_cv, scoring = 'matthews_corrcoef', **njobs,verbose = 3))
-#     #, ('fs', SelectKBest(mutual_info_classif, k=5))
-#     , ('clf', LogisticRegression(**rs))
-# ])
-
-# # Create search space
-# param_grid = [{'fs__step': [1]},
-
-#     {
-#         'clf': [LogisticRegression(**rs)],
-#         #'clf__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
-#         'clf__C': np.logspace(0, 4, 10),
-#         'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
-#         'clf__max_iter': list(range(100,800,100)),
-#         'clf__solver': ['saga']
-#     },
-#     {
-#         'clf': [LogisticRegression(**rs)],
-#         #'clf__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
-#         'clf__C': np.logspace(0, 4, 10),
-#         'clf__penalty': ['l2', 'none'],
-#         'clf__max_iter': list(range(100,800,100)),
-#         'clf__solver': ['newton-cg', 'lbfgs', 'sag']
-#     }, 
-#     {
-#         'clf': [LogisticRegression(**rs)],
-#         #'clf__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
-#         'clf__C': np.logspace(0, 4, 10),
-#         'clf__penalty': ['l1', 'l2'],
-#         'clf__max_iter': list(range(100,800,100)),
-#         'clf__solver': ['liblinear']
-#     }]
-
-# # Run Grid search
-# gscv_fs_lr = GridSearchCV(pipe
-#                           , param_grid
-#                           , cv = skf_cv
-#                           , scoring = mcc_score_fn, refit = 'mcc'
-#                           , verbose = 3)
-
-# gscv_fs_lr_fit = gscv_fs_lr.fit(X, y)
-# gscv_fs_lr_fit_be_mod = gscv_fs_lr_fit.best_params_
-# gscv_fs_lr_fit_be_res = gscv_fs_lr_fit.cv_results_
-
-# print('Best model:\n', gscv_fs_lr_fit_be_mod)
-# print('Best models score:\n', gscv_fs_lr_fit.best_score_, ':' , round(gscv_fs_lr_fit.best_score_, 2))
-
-# #print('\nMean test score from fit results:', round(mean(gscv_fs_lr_fit_be_res['mean_test_mcc']),2))
-# print('\nMean test score from fit results:', round(np.nanmean(gscv_fs_lr_fit_be_res['mean_test_mcc']),2))
-
-##############################################################################
-#MANUAL
-
 #%% Logistic Regression + hyperparam + FS: BaseEstimator: ClfSwitcher()
 model_lr = LogisticRegression(**rs)
 model_rfecv = RFECV(estimator = model_lr
@ -99,15 +20,6 @@ model_rfecv = RFECV(estimator = model_lr
                    , scoring = 'matthews_corrcoef'
                    )

-# model_rfecv = SequentialFeatureSelector(estimator = model_lr
-#                                           , n_features_to_select = 'auto'
-#                                           , tol = None
-# #                                         , cv = 10
-#                                           , cv = rskf_cv
-# #                                          , direction ='backward'
-#                                           , direction ='forward'
-#                                           , **njobs)
-
 param_grid2 = [
    {
        #'clf__estimator': [LogisticRegression(**rs)],
@ -155,14 +67,15 @@ pipeline = Pipeline([('pre', MinMaxScaler())
  
 # Fit
 lr_fs_fit = pipeline.fit(X,y)
-lr_fs_fit_be_mod = lr_fs_fit.best_params_
-lr_fs_fit_be_res = lr_fs_fit.cv_results_
+#lr_fs_fit_be_mod = lr_fs_fit.best_params_
+#lr_fs_fit_be_res = lr_fs_fit.cv_results_
+dir(lr_fs_fit)

 print('Best model:\n', lr_fs_fit_be_mod)
 print('Best models score:\n', lr_fs_fit.best_score_, ':' , round(lr_fs_fit.best_score_, 2))

 pipeline.predict(X_bts)
-lr_fs.predict(X_bts)
+lr_fs_fit.predict(X_bts)

 test_predict = pipeline.predict(X_bts)
 print(test_predict)
@ -238,4 +151,4 @@ lr_df

 #FIXME: tidy the index of the formatted df

-###############################################################################
+###############################################################################