LSHTM_analysis/scripts/ml/test_MultClfs.py

98 lines
3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 24 11:07:05 2022
@author: tanu
"""
import re
import argparse
###############################################################################
# gene = 'pncA'
# drug = 'pyrazinamide'
#total_mtblineage_uc = 8
#%% command line args: case sensitive
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = 'pncA')
arg_parser.add_argument('-g', '--gene', help = 'gene name', default = 'pyrazinamide')
args = arg_parser.parse_args()
drug = args.drug
gene = args.gene
###############################################################################
###############################################################################
#==================
# Import data
#==================
from ml_data_7030 import *
setvars(gene,drug)
from ml_data_7030 import *
# from YC run_all_ML: run locally
#from UQ_yc_RunAllClfs import run_all_ML
#====================
# Import ML function
#====================
from MultClfs import *
#==================
# other vars
#==================
tts_split_7030 = '70_30'
OutFile_suffix = '7030'
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/tts_7030/'
print('\nOutput directory:', outdir_ml)
#outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
#outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
###############################################################################
print('\n#####################################################################\n'
, '\nRunning ML analysis: Multiple models'
, '\nGene name:', gene
, '\nDrug name:', drug)
###############################################################################
#%% Test MultModelsCL WITHOUT returning formatted output
#================
# MultModelsCl: without formatted output
#================
mmD = MultModelsCl(input_df = X_smnc
, target = y_smnc
, var_type = 'mixed'
, tts_split_type = tts_split_7030
, resampling_type = 'smnc'
, skf_cv = skf_cv
, blind_test_df = X_bts
, blind_test_target = y_bts
, add_cm = True
, add_yn = True
, return_formatted_output = False)
#================
# MultModelsCl: WITH formatted output
#================
mmDF = MultModelsCl(input_df = X_smnc
, target = y_smnc
, var_type = 'mixed'
, tts_split_type = tts_split_7030
, resampling_type = 'smnc'
, skf_cv = skf_cv
, blind_test_df = X_bts
, blind_test_target = y_bts
, add_cm = True
, add_yn = True
, return_formatted_output= True )
#=================
# test function
#=================
# output from function call
ProcessMultModelCl(smnc_scores_mmD)