119 lines
4 KiB
Python
Executable file
119 lines
4 KiB
Python
Executable file
fs_test = RFECV(DecisionTreeClassifier(**rs)
|
|
, cv = StratifiedKFold(n_splits = 10, shuffle = True,**rs)
|
|
, scoring = 'matthews_corrcoef')
|
|
|
|
models = [('Logistic Regression' , LogisticRegression(**rs) )]
|
|
#, ('Logistic RegressionCV' , LogisticRegressionCV(**rs) )]
|
|
|
|
|
|
for m in models:
|
|
print(m)
|
|
print('\n================================================================\n')
|
|
|
|
index = 1
|
|
for model_name, model_fn in models:
|
|
print('\nRunning classifier:', index
|
|
, '\nModel_name:' , model_name
|
|
, '\nModel func:' , model_fn)
|
|
#, '\nList of models:', models)
|
|
index = index+1
|
|
|
|
fs2 = RFECV(model_fn
|
|
, cv = skf_cv
|
|
, scoring = 'matthews_corrcoef')
|
|
|
|
from sklearn.datasets import make_friedman1
|
|
from sklearn.datasets import load_iris
|
|
|
|
X_eg, y_eg = load_iris(return_X_y=True)
|
|
#X_eg, y_eg = make_friedman1(n_samples=50, n_features=10, random_state=0)
|
|
fs2.fit(X_eg,y_eg)
|
|
fs2.support_
|
|
fs2.ranking_
|
|
###############################################################################
|
|
# LR
|
|
|
|
a_fs = fsgs(input_df = X
|
|
, target = y
|
|
#, param_gridLd = [{'fs__min_features_to_select' : []}]
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
#, estimator = RandomForestClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
|
|
, estimator = LogisticRegression(**rs)
|
|
, use_fs = False # set True to use DT as a RFECV estimator
|
|
, var_type = 'mixed')
|
|
|
|
a_fs.keys()
|
|
a_fsDF = pd.DataFrame(a_fs.items()) # LR
|
|
a_fsDF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
|
|
a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
|
|
|
|
# this one
|
|
a_fs0 = fsgs(input_df = X
|
|
, target = y
|
|
, param_gridLd = [{'fs__min_features_to_select' : [1]}]
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
, estimator = LogisticRegression(**rs)
|
|
, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
|
|
, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv = skf_cv, scoring = 'matthews_corrcoef')
|
|
, cv_method = skf_cv
|
|
, var_type = 'mixed'
|
|
)
|
|
###############################################
|
|
|
|
|
|
|
|
##############################################################################
|
|
# my function CALL
|
|
#import fsgs from UQ_FS_fn
|
|
|
|
# RFECV by default uses the estimator provided, custom option to provide fs model using use_fs and
|
|
a_fs = fsgs(input_df = X
|
|
, target = y
|
|
, param_gridLd = [{'fs__min_features_to_select' : [1]}]
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
, estimator = LogisticRegression(**rs)
|
|
#, use_fs = False # uses estimator as the RFECV parameter for fs. Set to TRUE if you want to supply custom_fs as shown below
|
|
, use_fs = True, custom_fs = RFECV(DecisionTreeClassifier(**rs) , cv = skf_cv, scoring = 'matthews_corrcoef')
|
|
, cv_method = skf_cv
|
|
, var_type = 'mixed'
|
|
)
|
|
|
|
a_fs.keys()
|
|
a_fs2.keys()
|
|
a_fs3.keys()
|
|
|
|
|
|
a_fsDF = pd.DataFrame(a_fs.items()) # LR
|
|
a_fsDF.columns = ['parameter', 'param_value']
|
|
|
|
a_fs2DF2 = pd.DataFrame(a_fs2.items()) # use_FS= True
|
|
a_fs2DF2.columns = ['parameter', 'param_value']
|
|
|
|
a_fsDF3 = pd.DataFrame(a_fs3.items()) # RF
|
|
|
|
##############
|
|
a_mask = a_fs['fs_res_array']
|
|
a_fsDF.loc[a_fsDF['parameter'] == 'fs_res_array']
|
|
|
|
mod_selF = a_fs2DF2.loc[a_fsDF['parameter'] == 'sel_features_names']; mod_selF
|
|
mod_selFT = mod_selF.T
|
|
|
|
# subset keys
|
|
#keys_to_extract = ['model_name', 'fs_method', 'sel_features_names', 'all_feature_names', 'fs_res_array']
|
|
keys_to_extract = ['fs_method', 'sel_features_names']
|
|
a_subset = {key: a_fs2[key] for key in keys_to_extract}
|
|
a_subsetDF = pd.DataFrame(a_subset); a_subsetDF
|
|
|
|
mod_fs_method = a_fs2['fs_method']
|
|
fs_name = re.search('estimator=(\w+)',mod_fs_method)
|
|
fs_namefN = fs_namef.group(1)
|
|
print('\nFS method:', fs_namefN)
|
|
|
|
fsDF = a_subsetDF[['sel_features_names']];fsDF
|
|
fsDF.columns = [fs_namefN+'_FS']
|
|
fsDF.columns; fsDF
|
|
###############################
|
|
|