aaded scripts for FS including test call, etc

2022-06-23 14:53:01 +01:00 · 2022-06-23 14:53:01 +01:00 · 5dea35f97c
commit 5dea35f97c
parent 8fe0048328
3 changed files with 575 additions and 0 deletions
--- a/scripts/ml/scrMult_CALL.py
+++ b/scripts/ml/scrMult_CALL.py
@ -0,0 +1,119 @@
+fs_test = RFECV(DecisionTreeClassifier(**rs) 
+             , cv =  StratifiedKFold(n_splits = 10, shuffle = True,**rs)
+             , scoring = 'matthews_corrcoef')
+
+models = [('Logistic Regression'       , LogisticRegression(**rs) )]
+          #, ('Logistic RegressionCV'     , LogisticRegressionCV(**rs) )]
+
+
+for m in models:
+    print(m)
+print('\n================================================================\n')
+
+index = 1
+for model_name, model_fn in models:
+    print('\nRunning classifier:', index
+          , '\nModel_name:'               , model_name
+          , '\nModel func:'               , model_fn)
+          #, '\nList of models:', models)
+    index = index+1
+
+fs2 = RFECV(model_fn
+            , cv = skf_cv
+            , scoring = 'matthews_corrcoef')
+
+from sklearn.datasets import make_friedman1
+from sklearn.datasets import load_iris
+
+X_eg, y_eg = load_iris(return_X_y=True)
+#X_eg, y_eg = make_friedman1(n_samples=50, n_features=10, random_state=0)
+fs2.fit(X_eg,y_eg)
+fs2.support_
+fs2.ranking_
+###############################################################################
+# LR
+
+a_fs = fsgs(input_df = X
+     , target = y
+     #, param_gridLd = [{'fs__min_features_to_select' : []}]
+     , blind_test_df = X_bts
+     , blind_test_target = y_bts
+     #, estimator = RandomForestClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
+     , estimator = LogisticRegression(**rs)
+     , use_fs = False # set True to use DT as a RFECV estimator
+     , var_type = 'mixed')
+
+a_fs.keys()
+a_fsDF  = pd.DataFrame(a_fs.items()) # LR
+a_fsDF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
+a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
+
+# this one
+a_fs0 = fsgs(input_df = X
+         , target = y
+         , param_gridLd = [{'fs__min_features_to_select' : [1]}]
+         , blind_test_df = X_bts
+         , blind_test_target = y_bts
+         , estimator = LogisticRegression(**rs)
+         , use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
+         , custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv =  skf_cv, scoring = 'matthews_corrcoef')
+         , cv_method =  skf_cv
+         , var_type = 'mixed'
+         )
+###############################################
+
+
+
+##############################################################################
+# my function CALL
+#import fsgs from UQ_FS_fn
+
+# RFECV by default uses the estimator provided, custom option to provide fs model using use_fs and 
+a_fs = fsgs(input_df = X
+         , target = y
+         , param_gridLd = [{'fs__min_features_to_select' : [1]}]
+         , blind_test_df = X_bts
+         , blind_test_target = y_bts
+         , estimator = LogisticRegression(**rs)
+         #, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
+         , use_fs = True, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv =  skf_cv, scoring = 'matthews_corrcoef')
+         , cv_method =  skf_cv
+         , var_type = 'mixed'
+         )
+
+a_fs.keys()
+a_fs2.keys()
+a_fs3.keys()
+
+
+a_fsDF = pd.DataFrame(a_fs.items()) # LR
+a_fsDF.columns = ['parameter', 'param_value']
+
+a_fs2DF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
+a_fs2DF2.columns = ['parameter', 'param_value']
+
+a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
+
+##############
+a_mask = a_fs['fs_res_array']
+a_fsDF.loc[a_fsDF['parameter'] == 'fs_res_array']
+
+mod_selF = a_fs2DF2.loc[a_fsDF['parameter'] == 'sel_features_names']; mod_selF
+mod_selFT = mod_selF.T
+
+# subset keys
+#keys_to_extract = ['model_name', 'fs_method', 'sel_features_names', 'all_feature_names', 'fs_res_array']
+keys_to_extract = ['fs_method', 'sel_features_names']
+a_subset = {key: a_fs2[key] for key in keys_to_extract}
+a_subsetDF =  pd.DataFrame(a_subset); a_subsetDF
+
+mod_fs_method = a_fs2['fs_method']
+fs_name = re.search('estimator=(\w+)',mod_fs_method)
+fs_namefN = fs_namef.group(1)
+print('\nFS method:', fs_namefN)
+
+fsDF = a_subsetDF[['sel_features_names']];fsDF
+fsDF.columns = [fs_namefN+'_FS']
+fsDF.columns; fsDF
+###############################
+