added random state to split in function

This commit is contained in:
Tanushree Tunstall 2022-07-05 14:15:43 +01:00
parent e5f882841e
commit 53c229f480
3 changed files with 24 additions and 21 deletions

View file

@ -174,29 +174,29 @@ def CMLogoSkf(cm_input_df
, '\nTEST Target dim:', cm_bts_y.shape)
print("Running Multiple models on LOGO with SKF")
# #%%:Running Multiple models on LOGO with SKF
# cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X
# , target = cm_y
# #, group = 'none'
# , sel_cv = skf_cv
#%%:Running Multiple models on LOGO with SKF
cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X
, target = cm_y
#, group = 'none'
, sel_cv = skf_cv
# , blind_test_df = cm_bts_X
# , blind_test_target = cm_bts_y
, blind_test_df = cm_bts_X
, blind_test_target = cm_bts_y
# , tts_split_type = tts_split_type
, tts_split_type = tts_split_type
# , resampling_type = 'none' # default
# , add_cm = True
# , add_yn = True
# , var_type = 'mixed'
, resampling_type = 'none' # default
, add_cm = True
, add_yn = True
, var_type = 'mixed'
# , run_blind_test = True
# , return_formatted_output = True
# , random_state = 42
# , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
# )
, run_blind_test = True
, return_formatted_output = True
, random_state = 42
, n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
)
# cD3_v2.to_csv(outFile)
cD3_v2.to_csv(outFile)
#%% RUN
#===============

View file

@ -215,7 +215,7 @@ def MultModelsCl(input_df, target, skf_cv
, ('Naive Bayes' , BernoulliNB() )
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
, ('QDA' , QuadraticDiscriminantAnalysis() )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
, n_estimators = 1000
, bootstrap = True

View file

@ -40,7 +40,7 @@ import argparse
import re
homedir = os.path.expanduser("~")
#%% GLOBALS
rs = {'random_state': 42}
#rs = {'random_state': 42}
njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
#%% Define split_tts function #################################################
@ -51,7 +51,10 @@ def split_tts(ml_input_data
, dst_colname = 'dst'# determine how to subset the actual vs reverse data
, target_colname = 'dst_mode'
, include_gene_name = True
, k_smote = 5):
, k_smote = 5
, random_state = 42):
rs = {'random_state': random_state}
outDict = {}