added FS to MultClfs.py and modified data for different splits for consistency

This commit is contained in:
Tanushree Tunstall 2022-06-24 20:35:53 +01:00
parent edb7aebd6a
commit e2bc384155
12 changed files with 1585 additions and 994 deletions

View file

@ -55,9 +55,8 @@ OutFile_suffix = '7030'
outdir_ml = outdir + 'ml/tts_7030/'
print('\nOutput directory:', outdir_ml)
outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
#outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
#outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
outFile_wf = outdir_ml + gene.lower() + '_baselineC_noOR' + OutFile_suffix + '.csv'
#%% Running models ############################################################
print('\n#####################################################################\n'
, '\nStarting--> Running ML analysis: Baseline modes (No FS)'
@ -92,10 +91,24 @@ paramD = {
, 'resampling_type' : 'rouC'}
}
# Initial run to get the dict containing CV, BT and metadata DFs
mmD = {}
##==============================================================================
## Dict with no CV BT formatted df
## mmD = {}
## for k, v in paramD.items():
## # print(mmD[k])
## scores_7030D = MultModelsCl(**paramD[k]
## , tts_split_type = tts_split_7030
## , skf_cv = skf_cv
## , blind_test_df = X_bts
## , blind_test_target = y_bts
## , add_cm = True
## , add_yn = True
## , return_formatted_output = False)
## mmD[k] = scores_7030D
##==============================================================================
## Initial run to get the dict of dicts for each sampling type containing CV, BT and metadata DFs
mmDD = {}
for k, v in paramD.items():
# print(mmD[k])
scores_7030D = MultModelsCl(**paramD[k]
, tts_split_type = tts_split_7030
, skf_cv = skf_cv
@ -104,23 +117,25 @@ for k, v in paramD.items():
, add_cm = True
, add_yn = True
, return_formatted_output = True)
mmD[k] = scores_7030D
mmDD[k] = scores_7030D
# Extracting the dfs from within the dict and concatenating to output as one df
for k, v in mmD.items():
out_wf_7030 = pd.concat(mmD, ignore_index = True)
for k, v in mmDD.items():
out_wf_7030 = pd.concat(mmDD, ignore_index = True)
out_wf_7030f = out_wf_7030.sort_values(by = ['resampling', 'source_data', 'MCC'], ascending = [True, True, False], inplace = False)
print('\n######################################################################'
, '\nEnd--> Successfully generated output DF for Multiple classifiers (baseline models)'
, '\nGene:', gene.lower()
, '\nDrug:', drug
, '\noutput file:', outFile_wf
, '\nDim of output:', out_wf_7030.shape
, '\nDim of output:', out_wf_7030f.shape
, '\n######################################################################')
###############################################################################
#====================
# Write output file
#====================
#out_wf_7030.to_csv(outFile_wf, index = False)
out_wf_7030f.to_csv(outFile_wf, index = False)
print('\nFile successfully written:', outFile_wf)
###############################################################################