LSHTM_analysis/scripts/ml/scrMult_CALL.py

fs_test = RFECV(DecisionTreeClassifier(**rs)
             , cv =  StratifiedKFold(n_splits = 10, shuffle = True,**rs)
             , scoring = 'matthews_corrcoef')

models = [('Logistic Regression'       , LogisticRegression(**rs) )]
          #, ('Logistic RegressionCV'     , LogisticRegressionCV(**rs) )]


for m in models:
    print(m)
print('\n================================================================\n')

index = 1
for model_name, model_fn in models:
    print('\nRunning classifier:', index
          , '\nModel_name:'               , model_name
          , '\nModel func:'               , model_fn)
          #, '\nList of models:', models)
    index = index+1

fs2 = RFECV(model_fn
            , cv = skf_cv
            , scoring = 'matthews_corrcoef')

from sklearn.datasets import make_friedman1
from sklearn.datasets import load_iris

X_eg, y_eg = load_iris(return_X_y=True)
#X_eg, y_eg = make_friedman1(n_samples=50, n_features=10, random_state=0)
fs2.fit(X_eg,y_eg)
fs2.support_
fs2.ranking_
###############################################################################
# LR

a_fs = fsgs(input_df = X
     , target = y
     #, param_gridLd = [{'fs__min_features_to_select' : []}]
     , blind_test_df = X_bts
     , blind_test_target = y_bts
     #, estimator = RandomForestClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
     , estimator = LogisticRegression(**rs)
     , use_fs = False # set True to use DT as a RFECV estimator
     , var_type = 'mixed')

a_fs.keys()
a_fsDF  = pd.DataFrame(a_fs.items()) # LR
a_fsDF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF

# this one
a_fs0 = fsgs(input_df = X
         , target = y
         , param_gridLd = [{'fs__min_features_to_select' : [1]}]
         , blind_test_df = X_bts
         , blind_test_target = y_bts
         , estimator = LogisticRegression(**rs)
         , use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
         , custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv =  skf_cv, scoring = 'matthews_corrcoef')
         , cv_method =  skf_cv
         , var_type = 'mixed'
         )
###############################################


##############################################################################
# my function CALL
#import fsgs from UQ_FS_fn

# RFECV by default uses the estimator provided, custom option to provide fs model using use_fs and
a_fs = fsgs(input_df = X
         , target = y
         , param_gridLd = [{'fs__min_features_to_select' : [1]}]
         , blind_test_df = X_bts
         , blind_test_target = y_bts
         , estimator = LogisticRegression(**rs)
         #, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
         , use_fs = True, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv =  skf_cv, scoring = 'matthews_corrcoef')
         , cv_method =  skf_cv
         , var_type = 'mixed'
         )

a_fs.keys()
a_fs2.keys()
a_fs3.keys()


a_fsDF = pd.DataFrame(a_fs.items()) # LR
a_fsDF.columns = ['parameter', 'param_value']

a_fs2DF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
a_fs2DF2.columns = ['parameter', 'param_value']

a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF

##############
a_mask = a_fs['fs_res_array']
a_fsDF.loc[a_fsDF['parameter'] == 'fs_res_array']

mod_selF = a_fs2DF2.loc[a_fsDF['parameter'] == 'sel_features_names']; mod_selF
mod_selFT = mod_selF.T

# subset keys
#keys_to_extract = ['model_name', 'fs_method', 'sel_features_names', 'all_feature_names', 'fs_res_array']
keys_to_extract = ['fs_method', 'sel_features_names']
a_subset = {key: a_fs2[key] for key in keys_to_extract}
a_subsetDF =  pd.DataFrame(a_subset); a_subsetDF

mod_fs_method = a_fs2['fs_method']
fs_name = re.search('estimator=(\w+)',mod_fs_method)
fs_namefN = fs_namef.group(1)
print('\nFS method:', fs_namefN)

fsDF = a_subsetDF[['sel_features_names']];fsDF
fsDF.columns = [fs_namefN+'_FS']
fsDF.columns; fsDF
###############################