optimised run_7030.py to generate ouput from dict now that the processfunction and parameter dicts have been added
This commit is contained in:
parent
7dc7e25016
commit
b37a950fec
12 changed files with 180 additions and 128408 deletions
|
@ -9,6 +9,8 @@ Created on Mon Jun 20 13:05:23 2022
|
|||
import re
|
||||
import argparse
|
||||
import os, sys
|
||||
import collections
|
||||
|
||||
# gene = 'pncA'
|
||||
# drug = 'pyrazinamide'
|
||||
#total_mtblineage_uc = 8
|
||||
|
@ -25,6 +27,7 @@ import os, sys
|
|||
###############################################################################
|
||||
homedir = os.path.expanduser("~")
|
||||
sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml')
|
||||
|
||||
###############################################################################
|
||||
#==================
|
||||
# Import data
|
||||
|
@ -54,79 +57,70 @@ outdir_ml = outdir + 'ml/tts_7030/'
|
|||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
|
||||
outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
|
||||
#outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
|
||||
|
||||
#%% Running models ############################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: feature groups '
|
||||
, '\nStarting--> Running ML analysis: Baseline modes (No FS)'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug)
|
||||
, '\nDrug name:', drug
|
||||
, '\n#####################################################################\n')
|
||||
|
||||
fooD = {'baseline_paramD': {
|
||||
'input_df': X
|
||||
, 'target': y
|
||||
, 'var_type': 'mixed'
|
||||
, 'resampling_type': 'none'}
|
||||
,
|
||||
'smnc_paramD': {'input_df': X_smnc
|
||||
, 'target': y_smnc
|
||||
, 'var_type': 'mixed'
|
||||
, 'resampling_type': 'smnc'}
|
||||
}
|
||||
paramD = {
|
||||
'baseline_paramD': { 'input_df' : X
|
||||
, 'target' : y
|
||||
, 'var_type' : 'mixed'
|
||||
, 'resampling_type': 'none'}
|
||||
|
||||
, 'smnc_paramD': { 'input_df' : X_smnc
|
||||
, 'target' : y_smnc
|
||||
, 'var_type' : 'mixed'
|
||||
, 'resampling_type' : 'smnc'}
|
||||
|
||||
, 'ros_paramD': { 'input_df' : X_ros
|
||||
, 'target' : y_ros
|
||||
, 'var_type' : 'mixed'
|
||||
, 'resampling_type' : 'ros'}
|
||||
|
||||
barD = {}
|
||||
for k, v in fooD.items():
|
||||
#print(k)
|
||||
print(fooD[k])
|
||||
scores_7030D = MultModelsCl(**fooD[k]
|
||||
, 'rus_paramD' : { 'input_df' : X_rus
|
||||
, 'target' : y_rus
|
||||
, 'var_type' : 'mixed'
|
||||
, 'resampling_type' : 'rus'}
|
||||
|
||||
, 'rouC_paramD' : { 'input_df' : X_rouC
|
||||
, 'target' : y_rouC
|
||||
, 'var_type' : 'mixed'
|
||||
, 'resampling_type' : 'rouC'}
|
||||
}
|
||||
|
||||
# Initial run to get the dict containing CV, BT and metadata DFs
|
||||
mmD = {}
|
||||
for k, v in paramD.items():
|
||||
# print(fooD[k])
|
||||
scores_7030D = MultModelsCl(**paramD[k]
|
||||
, tts_split_type = tts_split_7030
|
||||
, skf_cv = skf_cv
|
||||
, blind_test_df = X_bts
|
||||
, blind_test_target = y_bts
|
||||
, add_cm = True
|
||||
, add_yn = True)
|
||||
barD[k] = scores_7030D
|
||||
, add_yn = True
|
||||
, return_formatted_output = True)
|
||||
mmD[k] = scores_7030D
|
||||
|
||||
|
||||
ros_paramD = {input_df = X_ros
|
||||
, target = y_ros
|
||||
, var_type = 'mixed'
|
||||
, resampling_type = 'smnc'}
|
||||
|
||||
|
||||
rus_paramD = {input_df = X_rus
|
||||
, target = y_rus
|
||||
, var_type = 'mixed'
|
||||
, resampling_type = 'rus'}
|
||||
|
||||
|
||||
rouC_paramD = {input_df = X_rouC
|
||||
, target = y_rouC
|
||||
, var_type = 'mixed'
|
||||
, resampling_type = 'rouC'}
|
||||
|
||||
|
||||
|
||||
|
||||
#====
|
||||
scores_7030D = MultModelsCl(**rouC_paramD
|
||||
, tts_split_type = tts_split_7030
|
||||
, skf_cv = skf_cv
|
||||
, blind_test_df = X_bts
|
||||
, blind_test_target = y_bts
|
||||
, add_cm = True
|
||||
, add_yn = True)
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#%% COMBINING all dfs: WF and LF
|
||||
# https://stackoverflow.com/questions/39862654/pandas-concat-of-multiple-data-frames-using-only-common-columns
|
||||
|
||||
|
||||
for k, v in mmD.items():
|
||||
out_wf_7030 = pd.concat(mmD, ignore_index = True)
|
||||
|
||||
print('\n######################################################################'
|
||||
, '\nEnd--> Successfully generated output DF for Multiple classifiers (baseline models)'
|
||||
, '\nGene:', gene.lower()
|
||||
, '\nDrug:', drug
|
||||
, '\noutput file:', outFile_wf
|
||||
, '\nDim of output:', out_wf_7030.shape
|
||||
, '\n######################################################################')
|
||||
###############################################################################
|
||||
#====================
|
||||
# Write output file
|
||||
#====================
|
||||
#combined_baseline_wf.to_csv(outFile_wf, index = False)
|
||||
#print('\nFile successfully written:', outFile_wf)
|
||||
out_wf_7030.to_csv(outFile_wf, index = False)
|
||||
print('\nFile successfully written:', outFile_wf)
|
||||
###############################################################################
|
Loading…
Add table
Add a link
Reference in a new issue