diff --git a/scripts/ml/combined_model/cm_logo_skf.py b/scripts/ml/combined_model/cm_logo_skf.py index 0ad72a2..ca81675 100755 --- a/scripts/ml/combined_model/cm_logo_skf.py +++ b/scripts/ml/combined_model/cm_logo_skf.py @@ -172,26 +172,25 @@ def CMLogoSkf(cm_input_df cm_bts_y = cm_test_df.loc[:, target_var] print('\nTEST data dim:' , cm_bts_X.shape - , '\nTEST Target dim:', cm_bts_y.shape) + , '\nTEST Target dim:' , cm_bts_y.shape) print("Running Multiple models on LOGO with SKF") #%%:Running Multiple models on LOGO with SKF cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X , target = cm_y - #, group = 'none' , sel_cv = skf_cv - - , blind_test_df = cm_bts_X - , blind_test_target = cm_bts_y - - , tts_split_type = tts_split_type - + , tts_split_type = tts_split_type , resampling_type = 'none' # default + #, group = 'none' + , add_cm = True , add_yn = True , var_type = 'mixed' - + , scale_numeric = ['min_max'] , run_blind_test = True + , blind_test_df = cm_bts_X + , blind_test_target = cm_bts_y + , return_formatted_output = True , random_state = 42 , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores diff --git a/scripts/ml/combined_model/running_ml_scripts.txt b/scripts/ml/combined_model/running_ml_scripts.txt index 3f04ff8..5c3793c 100644 --- a/scripts/ml/combined_model/running_ml_scripts.txt +++ b/scripts/ml/combined_model/running_ml_scripts.txt @@ -1,7 +1,6 @@ ######################################################################## - +# COMBINED Model ######################################################################## -time ./cm_logo_skf.py 2>&1 | tee log_cm_skf.txt - +time ./cm_logo_skf.py 2>&1 | tee cm_skf-$(date --iso).log diff --git a/scripts/ml/ml_functions/MultClfs_logo_skf.py b/scripts/ml/ml_functions/MultClfs_logo_skf.py index e2fc932..73e8b3a 100755 --- a/scripts/ml/ml_functions/MultClfs_logo_skf.py +++ b/scripts/ml/ml_functions/MultClfs_logo_skf.py @@ -90,10 +90,11 @@ scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef) , 'roc_auc' : make_scorer(roc_auc_score) , 'jcc' : make_scorer(jaccard_score) }) - + +# for sel_cv #skf_cv = StratifiedKFold(n_splits = 10 # #, shuffle = False, random_state= None) -# , shuffle = True,**rs) +# , shuffle = True, **rs) #rskf_cv = RepeatedStratifiedKFold(n_splits = 10 # , n_repeats = 3 @@ -188,15 +189,7 @@ def MultModelsCl_logo_skf(input_df #%% Func globals rs = {'random_state': random_state} njobs = {'n_jobs': n_jobs} - - skf_cv = StratifiedKFold(n_splits = 10 - #, shuffle = False, random_state= None) - , shuffle = True,**rs) - rskf_cv = RepeatedStratifiedKFold(n_splits = 10 - , n_repeats = 3 - , **rs) - logo = LeaveOneGroupOut() # select CV type: # if group == None: