added random state to split in function

2022-07-05 14:15:43 +01:00 · 2022-07-05 14:15:43 +01:00 · 53c229f480
commit 53c229f480
parent e5f882841e
3 changed files with 24 additions and 21 deletions
--- a/scripts/ml/combined_model/cm_logo_skf.py
+++ b/scripts/ml/combined_model/cm_logo_skf.py
@ -174,29 +174,29 @@ def CMLogoSkf(cm_input_df
              , '\nTEST Target dim:', cm_bts_y.shape)
        print("Running Multiple models on LOGO with SKF")
-        # #%%:Running Multiple models on LOGO with SKF
+        #%%:Running Multiple models on LOGO with SKF
-        # cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X
+        cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X
-        #                 , target = cm_y
+                        , target = cm_y
-        #                 #, group = 'none'
+                        #, group = 'none'
-        #                 , sel_cv = skf_cv
+                        , sel_cv = skf_cv
-        #                 , blind_test_df = cm_bts_X
+                        , blind_test_df = cm_bts_X
-        #                 , blind_test_target = cm_bts_y
+                        , blind_test_target = cm_bts_y
-        #                 , tts_split_type = tts_split_type
+                        , tts_split_type = tts_split_type
-        #                 , resampling_type = 'none' # default
+                        , resampling_type = 'none' # default
-        #                 , add_cm = True 
+                        , add_cm = True 
-        #                 , add_yn = True 
+                        , add_yn = True 
-        #                 , var_type = 'mixed'
+                        , var_type = 'mixed'
-        #                 , run_blind_test = True
+                        , run_blind_test = True
-        #                 , return_formatted_output = True
+                        , return_formatted_output = True
-        #                 , random_state = 42
+                        , random_state = 42
-        #                 , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
+                        , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
-        #                 )
+                        )
-        # cD3_v2.to_csv(outFile)
+        cD3_v2.to_csv(outFile)
 #%%  RUN
 #===============
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@ -215,7 +215,7 @@ def MultModelsCl(input_df, target, skf_cv
               , ('Naive Bayes'               , BernoulliNB() )
               , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
               , ('QDA'                       , QuadraticDiscriminantAnalysis() )
-               , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
+               , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) ) 
               , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
                                                                       , n_estimators     = 1000
                                                                       , bootstrap        = True
--- a/scripts/ml/ml_functions/SplitTTS.py
+++ b/scripts/ml/ml_functions/SplitTTS.py
@ -40,7 +40,7 @@ import argparse
 import re
 homedir = os.path.expanduser("~")
 #%% GLOBALS
-rs = {'random_state': 42}
+#rs = {'random_state': 42}
 njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
 #%% Define split_tts function #################################################
@ -51,7 +51,10 @@ def split_tts(ml_input_data
              , dst_colname    = 'dst'# determine how to subset the actual vs reverse data
              , target_colname = 'dst_mode'
              , include_gene_name = True
-              , k_smote = 5):
+              , k_smote = 5
              , random_state = 42):
    rs = {'random_state': random_state}
    outDict = {}