#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 20 13:05:23 2022 @author: tanu """ #%%Imports #################################################################### import re import argparse import os, sys # gene = 'pncA' # drug = 'pyrazinamide' #total_mtblineage_uc = 8 ############################################################################### #%% command line args: case sensitive arg_parser = argparse.ArgumentParser() arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') args = arg_parser.parse_args() drug = args.drug gene = args.gene ############################################################################### homedir = os.path.expanduser("~") sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml') ############################################################################### #================== # Import data #================== from ml_data_cd_8020 import * setvars(gene,drug) from ml_data_cd_8020 import * # from YC run_all_ML: run locally #from UQ_yc_RunAllClfs import run_all_ML #==================== # Import ML functions #==================== from MultClfs import * #================== # other vars #================== tts_split_cd_8020 = 'cd_80_20' OutFile_suffix = '_cd_8020' #================== # Specify outdir #================== outdir_ml = outdir + 'ml/tts_cd_8020/' print('\nOutput directory:', outdir_ml) #outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv' outFile_wf = outdir_ml + gene.lower() + '_baselineC_noOR' + OutFile_suffix + '.csv' #%% Running models ############################################################ print('\n#####################################################################\n' , '\nStarting--> Running ML analysis: Baseline modes (No FS)' , '\nGene name:', gene , '\nDrug name:', drug , '\n#####################################################################\n') paramD = { 'baseline_paramD': { 'input_df' : X , 'target' : y , 'var_type' : 'mixed' , 'resampling_type': 'none'} , 'smnc_paramD': { 'input_df' : X_smnc , 'target' : y_smnc , 'var_type' : 'mixed' , 'resampling_type' : 'smnc'} , 'ros_paramD': { 'input_df' : X_ros , 'target' : y_ros , 'var_type' : 'mixed' , 'resampling_type' : 'ros'} , 'rus_paramD' : { 'input_df' : X_rus , 'target' : y_rus , 'var_type' : 'mixed' , 'resampling_type' : 'rus'} , 'rouC_paramD' : { 'input_df' : X_rouC , 'target' : y_rouC , 'var_type' : 'mixed' , 'resampling_type' : 'rouC'} } ##============================================================================== ## Dict with no CV BT formatted df ## mmD = {} ## for k, v in paramD.items(): ## # print(mmD[k]) ## scores_cd_8020D = MultModelsCl(**paramD[k] ## , tts_split_type = tts_split_cd_8020 ## , skf_cv = skf_cv ## , blind_test_df = X_bts ## , blind_test_target = y_bts ## , add_cm = True ## , add_yn = True ## , return_formatted_output = False) ## mmD[k] = scores_cd_8020D ##============================================================================== ## Initial run to get the dict of dicts for each sampling type containing CV, BT and metadata DFs mmDD = {} for k, v in paramD.items(): scores_cd_8020D = MultModelsCl(**paramD[k] , tts_split_type = tts_split_cd_8020 , skf_cv = skf_cv , blind_test_df = X_bts , blind_test_target = y_bts , add_cm = True , add_yn = True , return_formatted_output = True) mmDD[k] = scores_cd_8020D # Extracting the dfs from within the dict and concatenating to output as one df for k, v in mmDD.items(): out_wf_cd_8020 = pd.concat(mmDD, ignore_index = True) out_wf_cd_8020f = out_wf_cd_8020.sort_values(by = ['resampling', 'source_data', 'MCC'], ascending = [True, True, False], inplace = False) print('\n######################################################################' , '\nEnd--> Successfully generated output DF for Multiple classifiers (baseline models)' , '\nGene:', gene.lower() , '\nDrug:', drug , '\noutput file:', outFile_wf , '\nDim of output:', out_wf_cd_8020f.shape , '\n######################################################################') ############################################################################### #==================== # Write output file #==================== out_wf_cd_8020f.to_csv(outFile_wf, index = False) print('\nFile successfully written:', outFile_wf) ###############################################################################