fixed indentation

2022-07-01 11:38:59 +01:00 · 2022-07-01 11:38:59 +01:00 · 0494765c9b
commit 0494765c9b
parent 7eef463915
2 changed files with 14 additions and 23 deletions
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@ -146,12 +146,12 @@ def MultModelsCl(input_df, target, skf_cv
                       , blind_test_df
                       , blind_test_target
                       , tts_split_type 
-                       , run_blind_test = True

                       , resampling_type = 'none' # default
                       , add_cm = True # adds confusion matrix based on cross_val_predict
                       , add_yn = True  # adds target var class numbers
                       , var_type = ['numerical', 'categorical','mixed']
+                       , run_blind_test = True
                       , return_formatted_output = True):

    '''
@ -344,27 +344,13 @@ def MultModelsCl(input_df, target, skf_cv
           mm_skf_scoresD[model_name]['bts_roc_auc']   = round(roc_auc_score(blind_test_target, bts_predict),2)
           mm_skf_scoresD[model_name]['bts_jcc']       = round(jaccard_score(blind_test_target, bts_predict),2)
           #mm_skf_scoresD[model_name]['diff_mcc']      = train_test_diff_MCC
-#%%
-        # ADD more info: meta data related to input and blind and resampling
-    
-        # target numbers: training
-        yc1           = Counter(target)
-        yc1_ratio     = yc1[0]/yc1[1]
-    
-        # target numbers: test
-        yc2       = Counter(blind_test_target)
-        yc2_ratio = yc2[0]/yc2[1]
-    
-        mm_skf_scoresD[model_name]['resampling']        = resampling_type
-        
-        mm_skf_scoresD[model_name]['n_training_size']   = len(input_df)
-        mm_skf_scoresD[model_name]['n_trainingY_ratio'] = round(yc1_ratio, 2)
-       
-        mm_skf_scoresD[model_name]['n_test_size']     = len(blind_test_df)
-        mm_skf_scoresD[model_name]['n_testY_ratio']   = round(yc2_ratio,2)
-        mm_skf_scoresD[model_name]['n_features']      = len(input_df.columns)
-        mm_skf_scoresD[model_name]['tts_split']       = tts_split_type
-
+          
+           #ADD: target numbers for bts
+           yc2                                        = Counter(blind_test_target)
+           yc2_ratio                                  = yc2[0]/yc2[1]  
+           mm_skf_scoresD[model_name]['n_test_size']  = len(blind_test_df)
+           mm_skf_scoresD[model_name]['n_testY_ratio']= round(yc2_ratio,2)
+           
    #return(mm_skf_scoresD)
    #============================
    # Process the dict to have WF
--- a/scripts/ml/ml_iterator.py
+++ b/scripts/ml/ml_iterator.py
@ -45,7 +45,12 @@ for gene, drug in ml_gene_drugD.items():
    print ('\nGene:', gene
           , '\nDrug:', drug)
    gene_low = gene.lower()
-    gene_dataD[gene_low] = getmldata(gene, drug)
+    gene_dataD[gene_low] = getmldata(gene, drug
+              , data_combined_model = False # this means it doesn't include 'gene_name' as a feauture as a single gene-target shouldn't have it.
+              , use_or = False
+              , omit_all_genomic_features = False
+              , write_maskfile = False
+              , write_outfile = False)

    for split_type in split_types:
        for data_type in split_data_types: