added FS to MultClfs.py and modified data for different splits for consistency
This commit is contained in:
parent
edb7aebd6a
commit
e2bc384155
12 changed files with 1585 additions and 994 deletions
|
@ -55,9 +55,8 @@ OutFile_suffix = '7030'
|
|||
outdir_ml = outdir + 'ml/tts_7030/'
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
|
||||
#outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
|
||||
|
||||
#outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
|
||||
outFile_wf = outdir_ml + gene.lower() + '_baselineC_noOR' + OutFile_suffix + '.csv'
|
||||
#%% Running models ############################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nStarting--> Running ML analysis: Baseline modes (No FS)'
|
||||
|
@ -92,10 +91,24 @@ paramD = {
|
|||
, 'resampling_type' : 'rouC'}
|
||||
}
|
||||
|
||||
# Initial run to get the dict containing CV, BT and metadata DFs
|
||||
mmD = {}
|
||||
##==============================================================================
|
||||
## Dict with no CV BT formatted df
|
||||
## mmD = {}
|
||||
## for k, v in paramD.items():
|
||||
## # print(mmD[k])
|
||||
## scores_7030D = MultModelsCl(**paramD[k]
|
||||
## , tts_split_type = tts_split_7030
|
||||
## , skf_cv = skf_cv
|
||||
## , blind_test_df = X_bts
|
||||
## , blind_test_target = y_bts
|
||||
## , add_cm = True
|
||||
## , add_yn = True
|
||||
## , return_formatted_output = False)
|
||||
## mmD[k] = scores_7030D
|
||||
##==============================================================================
|
||||
## Initial run to get the dict of dicts for each sampling type containing CV, BT and metadata DFs
|
||||
mmDD = {}
|
||||
for k, v in paramD.items():
|
||||
# print(mmD[k])
|
||||
scores_7030D = MultModelsCl(**paramD[k]
|
||||
, tts_split_type = tts_split_7030
|
||||
, skf_cv = skf_cv
|
||||
|
@ -104,23 +117,25 @@ for k, v in paramD.items():
|
|||
, add_cm = True
|
||||
, add_yn = True
|
||||
, return_formatted_output = True)
|
||||
mmD[k] = scores_7030D
|
||||
mmDD[k] = scores_7030D
|
||||
|
||||
# Extracting the dfs from within the dict and concatenating to output as one df
|
||||
for k, v in mmD.items():
|
||||
out_wf_7030 = pd.concat(mmD, ignore_index = True)
|
||||
for k, v in mmDD.items():
|
||||
out_wf_7030 = pd.concat(mmDD, ignore_index = True)
|
||||
|
||||
out_wf_7030f = out_wf_7030.sort_values(by = ['resampling', 'source_data', 'MCC'], ascending = [True, True, False], inplace = False)
|
||||
|
||||
print('\n######################################################################'
|
||||
, '\nEnd--> Successfully generated output DF for Multiple classifiers (baseline models)'
|
||||
, '\nGene:', gene.lower()
|
||||
, '\nDrug:', drug
|
||||
, '\noutput file:', outFile_wf
|
||||
, '\nDim of output:', out_wf_7030.shape
|
||||
, '\nDim of output:', out_wf_7030f.shape
|
||||
, '\n######################################################################')
|
||||
###############################################################################
|
||||
#====================
|
||||
# Write output file
|
||||
#====================
|
||||
#out_wf_7030.to_csv(outFile_wf, index = False)
|
||||
out_wf_7030f.to_csv(outFile_wf, index = False)
|
||||
print('\nFile successfully written:', outFile_wf)
|
||||
###############################################################################
|
Loading…
Add table
Add a link
Reference in a new issue