#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 20 13:05:23 2022 @author: tanu """ #%%Imports #################################################################### import re import argparse import os, sys # gene = 'pncA' # drug = 'pyrazinamide' #total_mtblineage_uc = 8 ############################################################################### #%% command line args: case sensitive # arg_parser = argparse.ArgumentParser() # arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') # arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') # args = arg_parser.parse_args() # drug = args.drug # gene = args.gene ############################################################################### homedir = os.path.expanduser("~") sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml') ############################################################################### #================== # Import data #================== from ml_data_7030 import * setvars(gene,drug) from ml_data_7030 import * # from YC run_all_ML: run locally #from UQ_yc_RunAllClfs import run_all_ML #==================== # Import ML functions #==================== from MultClfs import * #================== # other vars #================== tts_split_7030 = '70_30' OutFile_suffix = '7030' #================== # Specify outdir #================== outdir_ml = outdir + 'ml/tts_7030/' print('\nOutput directory:', outdir_ml) outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv' #outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv' #%% Running models ############################################################ print('\n#####################################################################\n' , '\nStarting--> Running ML analysis: Baseline modes (No FS)' , '\nGene name:', gene , '\nDrug name:', drug , '\n#####################################################################\n') paramD = { 'baseline_paramD': { 'input_df' : X , 'target' : y , 'var_type' : 'mixed' , 'resampling_type': 'none'} , 'smnc_paramD': { 'input_df' : X_smnc , 'target' : y_smnc , 'var_type' : 'mixed' , 'resampling_type' : 'smnc'} , 'ros_paramD': { 'input_df' : X_ros , 'target' : y_ros , 'var_type' : 'mixed' , 'resampling_type' : 'ros'} , 'rus_paramD' : { 'input_df' : X_rus , 'target' : y_rus , 'var_type' : 'mixed' , 'resampling_type' : 'rus'} , 'rouC_paramD' : { 'input_df' : X_rouC , 'target' : y_rouC , 'var_type' : 'mixed' , 'resampling_type' : 'rouC'} } # Initial run to get the dict containing CV, BT and metadata DFs mmD = {} for k, v in paramD.items(): # print(mmD[k]) scores_7030D = MultModelsCl(**paramD[k] , tts_split_type = tts_split_7030 , skf_cv = skf_cv , blind_test_df = X_bts , blind_test_target = y_bts , add_cm = True , add_yn = True , return_formatted_output = True) mmD[k] = scores_7030D # Extracting the dfs from within the dict and concatenating to output as one df for k, v in mmD.items(): out_wf_7030 = pd.concat(mmD, ignore_index = True) print('\n######################################################################' , '\nEnd--> Successfully generated output DF for Multiple classifiers (baseline models)' , '\nGene:', gene.lower() , '\nDrug:', drug , '\noutput file:', outFile_wf , '\nDim of output:', out_wf_7030.shape , '\n######################################################################') ############################################################################### #==================== # Write output file #==================== #out_wf_7030.to_csv(outFile_wf, index = False) print('\nFile successfully written:', outFile_wf) ###############################################################################