LSHTM_analysis/scripts/ml/scrMult_CALL.py

116 lines
4 KiB
Python
Executable file

fs_test = RFECV(DecisionTreeClassifier(**rs)
, cv = StratifiedKFold(n_splits = 10, shuffle = True,**rs)
, scoring = 'matthews_corrcoef')
models = [('Logistic Regression' , LogisticRegression(**rs) )]
#, ('Logistic RegressionCV' , LogisticRegressionCV(**rs) )]
for m in models:
print(m)
print('\n================================================================\n')
index = 1
for model_name, model_fn in models:
print('\nRunning classifier:', index
, '\nModel_name:' , model_name
, '\nModel func:' , model_fn)
#, '\nList of models:', models)
index = index+1
fs2 = RFECV(model_fn
, cv = skf_cv
, scoring = 'matthews_corrcoef')
from sklearn.datasets import make_friedman1
from sklearn.datasets import load_iris
X_eg, y_eg = load_iris(return_X_y=True)
#X_eg, y_eg = make_friedman1(n_samples=50, n_features=10, random_state=0)
fs2.fit(X_eg,y_eg)
fs2.support_
fs2.ranking_
###############################################################################
# LR
a_fs = fsgs(input_df = X
, target = y
#, param_gridLd = [{'fs__min_features_to_select' : []}]
, blind_test_df = X_bts
, blind_test_target = y_bts
#, estimator = RandomForestClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
, estimator = LogisticRegression(**rs)
, use_fs = False # set True to use DT as a RFECV estimator
, var_type = 'mixed')
a_fs.keys()
a_fsDF = pd.DataFrame(a_fs.items()) # LR
a_fsDF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
# this one
a_fs0 = fsgs(input_df = X
, target = y
, param_gridLd = [{'fs__min_features_to_select' : [1]}]
, blind_test_df = X_bts
, blind_test_target = y_bts
, estimator = LogisticRegression(**rs)
, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv = skf_cv, scoring = 'matthews_corrcoef')
, cv_method = skf_cv
, var_type = 'mixed'
)
###############################################
##############################################################################
# my function CALL
#import fsgs from UQ_FS_fn
# RFECV by default uses the estimator provided, custom option to provide fs model using use_fs and
a_fs = fsgs(input_df = X
, target = y
, param_gridLd = [{'fs__min_features_to_select' : [1]}]
, blind_test_df = X_bts
, blind_test_target = y_bts
, estimator = LogisticRegression(**rs)
#, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
, use_fs = True, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv = skf_cv, scoring = 'matthews_corrcoef')
, cv_method = skf_cv
, var_type = 'mixed'
)
a_fs.keys()
a_fs2.keys()
a_fs3.keys()
a_fsDF = pd.DataFrame(a_fs.items()) # LR
a_fsDF.columns = ['parameter', 'param_value']
a_fs2DF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
a_fs2DF2.columns = ['parameter', 'param_value']
a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
##############
a_mask = a_fs['fs_res_array']
a_fsDF.loc[a_fsDF['parameter'] == 'fs_res_array']
mod_selF = a_fs2DF2.loc[a_fsDF['parameter'] == 'sel_features_names']; mod_selF
mod_selFT = mod_selF.T
# subset keys
#keys_to_extract = ['model_name', 'fs_method', 'sel_features_names', 'all_feature_names', 'fs_res_array']
keys_to_extract = ['fs_method', 'sel_features_names']
a_subset = {key: a_fs2[key] for key in keys_to_extract}
a_subsetDF = pd.DataFrame(a_subset); a_subsetDF
mod_fs_method = a_fs2['fs_method']
fs_name = re.search('estimator=(\w+)',mod_fs_method)
fs_namefN = fs_namef.group(1)
print('\nFS method:', fs_namefN)
fsDF = a_subsetDF[['sel_features_names']];fsDF
fsDF.columns = [fs_namefN+'_FS']
fsDF.columns; fsDF
###############################