added random state to split in function

This commit is contained in:
Tanushree Tunstall 2022-07-05 14:15:43 +01:00
parent e5f882841e
commit 53c229f480
3 changed files with 24 additions and 21 deletions

View file

@ -174,29 +174,29 @@ def CMLogoSkf(cm_input_df
, '\nTEST Target dim:', cm_bts_y.shape) , '\nTEST Target dim:', cm_bts_y.shape)
print("Running Multiple models on LOGO with SKF") print("Running Multiple models on LOGO with SKF")
# #%%:Running Multiple models on LOGO with SKF #%%:Running Multiple models on LOGO with SKF
# cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X
# , target = cm_y , target = cm_y
# #, group = 'none' #, group = 'none'
# , sel_cv = skf_cv , sel_cv = skf_cv
# , blind_test_df = cm_bts_X , blind_test_df = cm_bts_X
# , blind_test_target = cm_bts_y , blind_test_target = cm_bts_y
# , tts_split_type = tts_split_type , tts_split_type = tts_split_type
# , resampling_type = 'none' # default , resampling_type = 'none' # default
# , add_cm = True , add_cm = True
# , add_yn = True , add_yn = True
# , var_type = 'mixed' , var_type = 'mixed'
# , run_blind_test = True , run_blind_test = True
# , return_formatted_output = True , return_formatted_output = True
# , random_state = 42 , random_state = 42
# , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores , n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
# ) )
# cD3_v2.to_csv(outFile) cD3_v2.to_csv(outFile)
#%% RUN #%% RUN
#=============== #===============

View file

@ -215,7 +215,7 @@ def MultModelsCl(input_df, target, skf_cv
, ('Naive Bayes' , BernoulliNB() ) , ('Naive Bayes' , BernoulliNB() )
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) ) , ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
, ('QDA' , QuadraticDiscriminantAnalysis() ) , ('QDA' , QuadraticDiscriminantAnalysis() )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) ) , ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5 , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
, n_estimators = 1000 , n_estimators = 1000
, bootstrap = True , bootstrap = True

View file

@ -40,7 +40,7 @@ import argparse
import re import re
homedir = os.path.expanduser("~") homedir = os.path.expanduser("~")
#%% GLOBALS #%% GLOBALS
rs = {'random_state': 42} #rs = {'random_state': 42}
njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
#%% Define split_tts function ################################################# #%% Define split_tts function #################################################
@ -51,7 +51,10 @@ def split_tts(ml_input_data
, dst_colname = 'dst'# determine how to subset the actual vs reverse data , dst_colname = 'dst'# determine how to subset the actual vs reverse data
, target_colname = 'dst_mode' , target_colname = 'dst_mode'
, include_gene_name = True , include_gene_name = True
, k_smote = 5): , k_smote = 5
, random_state = 42):
rs = {'random_state': random_state}
outDict = {} outDict = {}