diff --git a/scripts/ml/ml_iterator.py b/scripts/ml/ml_iterator.py index 8ebb88c..7ea16fb 100755 --- a/scripts/ml/ml_iterator.py +++ b/scripts/ml/ml_iterator.py @@ -24,6 +24,14 @@ from MultClfs import * from GetMLData import * from SplitTTS import * +skf_cv = StratifiedKFold(n_splits = 10 + #, shuffle = False, random_state= None) + , shuffle = True,**rs) + +#rskf_cv = RepeatedStratifiedKFold(n_splits = 10 +# , n_repeats = 3 +# , **rs) + # param dict for getmldata() gene_model_paramD = {'data_combined_model' : False , 'use_or' : False @@ -89,14 +97,21 @@ for gene, drug in ml_gene_drugD.items(): mmDD = {} for k, v in paramD.items(): scoresD = MultModelsCl(**paramD[k] - , tts_split_type = split_type , sel_cv = skf_cv - , blind_test_df = tempD['X_bts'] - , blind_test_target = tempD['y_bts'] - , scale_numeric = ['min_max'] + , tts_split_type = split_type + , add_cm = True , add_yn = True - , return_formatted_output = True) + + , scale_numeric = ['min_max'] + + , run_blind_test = True + , blind_test_df = tempD['X_bts'] + , blind_test_target = tempD['y_bts'] + , return_formatted_output = True + , random_state = 42 + , n_jobs = os.cpu_count() + ) mmDD[k] = scoresD # Extracting the dfs from within the dict and concatenating to output as one df