fixed indentation

This commit is contained in:
Tanushree Tunstall 2022-07-01 11:38:59 +01:00
parent 7eef463915
commit 0494765c9b
2 changed files with 14 additions and 23 deletions

View file

@ -146,12 +146,12 @@ def MultModelsCl(input_df, target, skf_cv
, blind_test_df
, blind_test_target
, tts_split_type
, run_blind_test = True
, resampling_type = 'none' # default
, add_cm = True # adds confusion matrix based on cross_val_predict
, add_yn = True # adds target var class numbers
, var_type = ['numerical', 'categorical','mixed']
, run_blind_test = True
, return_formatted_output = True):
'''
@ -344,27 +344,13 @@ def MultModelsCl(input_df, target, skf_cv
mm_skf_scoresD[model_name]['bts_roc_auc'] = round(roc_auc_score(blind_test_target, bts_predict),2)
mm_skf_scoresD[model_name]['bts_jcc'] = round(jaccard_score(blind_test_target, bts_predict),2)
#mm_skf_scoresD[model_name]['diff_mcc'] = train_test_diff_MCC
#%%
# ADD more info: meta data related to input and blind and resampling
# target numbers: training
yc1 = Counter(target)
yc1_ratio = yc1[0]/yc1[1]
# target numbers: test
yc2 = Counter(blind_test_target)
yc2_ratio = yc2[0]/yc2[1]
mm_skf_scoresD[model_name]['resampling'] = resampling_type
mm_skf_scoresD[model_name]['n_training_size'] = len(input_df)
mm_skf_scoresD[model_name]['n_trainingY_ratio'] = round(yc1_ratio, 2)
mm_skf_scoresD[model_name]['n_test_size'] = len(blind_test_df)
mm_skf_scoresD[model_name]['n_testY_ratio'] = round(yc2_ratio,2)
mm_skf_scoresD[model_name]['n_features'] = len(input_df.columns)
mm_skf_scoresD[model_name]['tts_split'] = tts_split_type
#ADD: target numbers for bts
yc2 = Counter(blind_test_target)
yc2_ratio = yc2[0]/yc2[1]
mm_skf_scoresD[model_name]['n_test_size'] = len(blind_test_df)
mm_skf_scoresD[model_name]['n_testY_ratio']= round(yc2_ratio,2)
#return(mm_skf_scoresD)
#============================
# Process the dict to have WF

View file

@ -45,7 +45,12 @@ for gene, drug in ml_gene_drugD.items():
print ('\nGene:', gene
, '\nDrug:', drug)
gene_low = gene.lower()
gene_dataD[gene_low] = getmldata(gene, drug)
gene_dataD[gene_low] = getmldata(gene, drug
, data_combined_model = False # this means it doesn't include 'gene_name' as a feauture as a single gene-target shouldn't have it.
, use_or = False
, omit_all_genomic_features = False
, write_maskfile = False
, write_outfile = False)
for split_type in split_types:
for data_type in split_data_types: