diff --git a/scripts/ml/combined_model/cm_logo_skf.py b/scripts/ml/combined_model/cm_logo_skf.py index 0748466..4efa0f3 100755 --- a/scripts/ml/combined_model/cm_logo_skf.py +++ b/scripts/ml/combined_model/cm_logo_skf.py @@ -174,29 +174,29 @@ def CMLogoSkf(cm_input_df , '\nTEST Target dim:', cm_bts_y.shape) print("Running Multiple models on LOGO with SKF") - # #%%:Running Multiple models on LOGO with SKF - # cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X - # , target = cm_y - # #, group = 'none' - # , sel_cv = skf_cv + #%%:Running Multiple models on LOGO with SKF + cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X + , target = cm_y + #, group = 'none' + , sel_cv = skf_cv - # , blind_test_df = cm_bts_X - # , blind_test_target = cm_bts_y + , blind_test_df = cm_bts_X + , blind_test_target = cm_bts_y - # , tts_split_type = tts_split_type + , tts_split_type = tts_split_type - # , resampling_type = 'none' # default - # , add_cm = True - # , add_yn = True - # , var_type = 'mixed' + , resampling_type = 'none' # default + , add_cm = True + , add_yn = True + , var_type = 'mixed' - # , run_blind_test = True - # , return_formatted_output = True - # , random_state = 42 - # , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores - # ) + , run_blind_test = True + , return_formatted_output = True + , random_state = 42 + , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores + ) - # cD3_v2.to_csv(outFile) + cD3_v2.to_csv(outFile) #%% RUN #=============== diff --git a/scripts/ml/ml_functions/MultClfs.py b/scripts/ml/ml_functions/MultClfs.py index 522ef20..d3b684a 100755 --- a/scripts/ml/ml_functions/MultClfs.py +++ b/scripts/ml/ml_functions/MultClfs.py @@ -215,7 +215,7 @@ def MultModelsCl(input_df, target, skf_cv , ('Naive Bayes' , BernoulliNB() ) , ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) ) , ('QDA' , QuadraticDiscriminantAnalysis() ) - , ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) ) + , ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) ) , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5 , n_estimators = 1000 , bootstrap = True diff --git a/scripts/ml/ml_functions/SplitTTS.py b/scripts/ml/ml_functions/SplitTTS.py index a2de72b..70e47ea 100644 --- a/scripts/ml/ml_functions/SplitTTS.py +++ b/scripts/ml/ml_functions/SplitTTS.py @@ -40,7 +40,7 @@ import argparse import re homedir = os.path.expanduser("~") #%% GLOBALS -rs = {'random_state': 42} +#rs = {'random_state': 42} njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores #%% Define split_tts function ################################################# @@ -51,7 +51,10 @@ def split_tts(ml_input_data , dst_colname = 'dst'# determine how to subset the actual vs reverse data , target_colname = 'dst_mode' , include_gene_name = True - , k_smote = 5): + , k_smote = 5 + , random_state = 42): + + rs = {'random_state': random_state} outDict = {}