diff --git a/scripts/ml/ml_functions/MultClfs.py b/scripts/ml/ml_functions/MultClfs.py index 6c8e7f8..c687159 100755 --- a/scripts/ml/ml_functions/MultClfs.py +++ b/scripts/ml/ml_functions/MultClfs.py @@ -146,12 +146,12 @@ def MultModelsCl(input_df, target, skf_cv , blind_test_df , blind_test_target , tts_split_type - , run_blind_test = True , resampling_type = 'none' # default , add_cm = True # adds confusion matrix based on cross_val_predict , add_yn = True # adds target var class numbers , var_type = ['numerical', 'categorical','mixed'] + , run_blind_test = True , return_formatted_output = True): ''' @@ -344,27 +344,13 @@ def MultModelsCl(input_df, target, skf_cv mm_skf_scoresD[model_name]['bts_roc_auc'] = round(roc_auc_score(blind_test_target, bts_predict),2) mm_skf_scoresD[model_name]['bts_jcc'] = round(jaccard_score(blind_test_target, bts_predict),2) #mm_skf_scoresD[model_name]['diff_mcc'] = train_test_diff_MCC -#%% - # ADD more info: meta data related to input and blind and resampling - - # target numbers: training - yc1 = Counter(target) - yc1_ratio = yc1[0]/yc1[1] - - # target numbers: test - yc2 = Counter(blind_test_target) - yc2_ratio = yc2[0]/yc2[1] - - mm_skf_scoresD[model_name]['resampling'] = resampling_type - - mm_skf_scoresD[model_name]['n_training_size'] = len(input_df) - mm_skf_scoresD[model_name]['n_trainingY_ratio'] = round(yc1_ratio, 2) - - mm_skf_scoresD[model_name]['n_test_size'] = len(blind_test_df) - mm_skf_scoresD[model_name]['n_testY_ratio'] = round(yc2_ratio,2) - mm_skf_scoresD[model_name]['n_features'] = len(input_df.columns) - mm_skf_scoresD[model_name]['tts_split'] = tts_split_type - + + #ADD: target numbers for bts + yc2 = Counter(blind_test_target) + yc2_ratio = yc2[0]/yc2[1] + mm_skf_scoresD[model_name]['n_test_size'] = len(blind_test_df) + mm_skf_scoresD[model_name]['n_testY_ratio']= round(yc2_ratio,2) + #return(mm_skf_scoresD) #============================ # Process the dict to have WF diff --git a/scripts/ml/ml_iterator.py b/scripts/ml/ml_iterator.py index a9be2b4..e6ea9d2 100755 --- a/scripts/ml/ml_iterator.py +++ b/scripts/ml/ml_iterator.py @@ -45,7 +45,12 @@ for gene, drug in ml_gene_drugD.items(): print ('\nGene:', gene , '\nDrug:', drug) gene_low = gene.lower() - gene_dataD[gene_low] = getmldata(gene, drug) + gene_dataD[gene_low] = getmldata(gene, drug + , data_combined_model = False # this means it doesn't include 'gene_name' as a feauture as a single gene-target shouldn't have it. + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = False + , write_outfile = False) for split_type in split_types: for data_type in split_data_types: