132 lines
No EOL
4 KiB
Python
132 lines
No EOL
4 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Mon Jun 20 13:05:23 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
#%%Imports ####################################################################
|
|
import re
|
|
import argparse
|
|
import os, sys
|
|
# gene = 'pncA'
|
|
# drug = 'pyrazinamide'
|
|
#total_mtblineage_uc = 8
|
|
###############################################################################
|
|
#%% command line args: case sensitive
|
|
# arg_parser = argparse.ArgumentParser()
|
|
# arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
|
|
# arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '')
|
|
# args = arg_parser.parse_args()
|
|
|
|
# drug = args.drug
|
|
# gene = args.gene
|
|
|
|
###############################################################################
|
|
homedir = os.path.expanduser("~")
|
|
sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml')
|
|
###############################################################################
|
|
#==================
|
|
# Import data
|
|
#==================
|
|
from ml_data_7030 import *
|
|
setvars(gene,drug)
|
|
from ml_data_7030 import *
|
|
|
|
# from YC run_all_ML: run locally
|
|
#from UQ_yc_RunAllClfs import run_all_ML
|
|
|
|
#====================
|
|
# Import ML functions
|
|
#====================
|
|
from MultClfs import *
|
|
|
|
#==================
|
|
# other vars
|
|
#==================
|
|
tts_split_7030 = '70_30'
|
|
OutFile_suffix = '7030'
|
|
|
|
#==================
|
|
# Specify outdir
|
|
#==================
|
|
outdir_ml = outdir + 'ml/tts_7030/'
|
|
print('\nOutput directory:', outdir_ml)
|
|
|
|
outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
|
|
outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
|
|
|
|
#%% Running models ############################################################
|
|
print('\n#####################################################################\n'
|
|
, '\nRunning ML analysis: feature groups '
|
|
, '\nGene name:', gene
|
|
, '\nDrug name:', drug)
|
|
|
|
fooD = {'baseline_paramD': {
|
|
'input_df': X
|
|
, 'target': y
|
|
, 'var_type': 'mixed'
|
|
, 'resampling_type': 'none'}
|
|
,
|
|
'smnc_paramD': {'input_df': X_smnc
|
|
, 'target': y_smnc
|
|
, 'var_type': 'mixed'
|
|
, 'resampling_type': 'smnc'}
|
|
}
|
|
|
|
barD = {}
|
|
for k, v in fooD.items():
|
|
#print(k)
|
|
print(fooD[k])
|
|
scores_7030D = MultModelsCl(**fooD[k]
|
|
, tts_split_type = tts_split_7030
|
|
, skf_cv = skf_cv
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
, add_cm = True
|
|
, add_yn = True)
|
|
barD[k] = scores_7030D
|
|
|
|
|
|
ros_paramD = {input_df = X_ros
|
|
, target = y_ros
|
|
, var_type = 'mixed'
|
|
, resampling_type = 'smnc'}
|
|
|
|
|
|
rus_paramD = {input_df = X_rus
|
|
, target = y_rus
|
|
, var_type = 'mixed'
|
|
, resampling_type = 'rus'}
|
|
|
|
|
|
rouC_paramD = {input_df = X_rouC
|
|
, target = y_rouC
|
|
, var_type = 'mixed'
|
|
, resampling_type = 'rouC'}
|
|
|
|
|
|
|
|
|
|
#====
|
|
scores_7030D = MultModelsCl(**rouC_paramD
|
|
, tts_split_type = tts_split_7030
|
|
, skf_cv = skf_cv
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
, add_cm = True
|
|
, add_yn = True)
|
|
|
|
###############################################################################
|
|
###############################################################################
|
|
#%% COMBINING all dfs: WF and LF
|
|
# https://stackoverflow.com/questions/39862654/pandas-concat-of-multiple-data-frames-using-only-common-columns
|
|
|
|
|
|
###############################################################################
|
|
#====================
|
|
# Write output file
|
|
#====================
|
|
#combined_baseline_wf.to_csv(outFile_wf, index = False)
|
|
#print('\nFile successfully written:', outFile_wf)
|
|
############################################################################### |