#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 20 13:05:23 2022 @author: tanu """ #%%Imports #################################################################### import re import argparse import os, sys # gene = 'pncA' # drug = 'pyrazinamide' #total_mtblineage_uc = 8 ############################################################################### #%% command line args: case sensitive # arg_parser = argparse.ArgumentParser() # arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') # arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') # args = arg_parser.parse_args() # drug = args.drug # gene = args.gene ############################################################################### homedir = os.path.expanduser("~") sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml') ############################################################################### #================== # Import data #================== from ml_data_7030 import * setvars(gene,drug) from ml_data_7030 import * # from YC run_all_ML: run locally #from UQ_yc_RunAllClfs import run_all_ML #==================== # Import ML functions #==================== from MultClfs import * #================== # other vars #================== tts_split_7030 = '70_30' OutFile_suffix = '7030' #================== # Specify outdir #================== outdir_ml = outdir + 'ml/tts_7030/' print('\nOutput directory:', outdir_ml) outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv' outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv' #%% Running models ############################################################ print('\n#####################################################################\n' , '\nRunning ML analysis: feature groups ' , '\nGene name:', gene , '\nDrug name:', drug) fooD = {'baseline_paramD': { 'input_df': X , 'target': y , 'var_type': 'mixed' , 'resampling_type': 'none'} , 'smnc_paramD': {'input_df': X_smnc , 'target': y_smnc , 'var_type': 'mixed' , 'resampling_type': 'smnc'} } barD = {} for k, v in fooD.items(): #print(k) print(fooD[k]) scores_7030D = MultModelsCl(**fooD[k] , tts_split_type = tts_split_7030 , skf_cv = skf_cv , blind_test_df = X_bts , blind_test_target = y_bts , add_cm = True , add_yn = True) barD[k] = scores_7030D ros_paramD = {input_df = X_ros , target = y_ros , var_type = 'mixed' , resampling_type = 'smnc'} rus_paramD = {input_df = X_rus , target = y_rus , var_type = 'mixed' , resampling_type = 'rus'} rouC_paramD = {input_df = X_rouC , target = y_rouC , var_type = 'mixed' , resampling_type = 'rouC'} #==== scores_7030D = MultModelsCl(**rouC_paramD , tts_split_type = tts_split_7030 , skf_cv = skf_cv , blind_test_df = X_bts , blind_test_target = y_bts , add_cm = True , add_yn = True) ############################################################################### ############################################################################### #%% COMBINING all dfs: WF and LF # https://stackoverflow.com/questions/39862654/pandas-concat-of-multiple-data-frames-using-only-common-columns ############################################################################### #==================== # Write output file #==================== #combined_baseline_wf.to_csv(outFile_wf, index = False) #print('\nFile successfully written:', outFile_wf) ###############################################################################