From 96f4e7085a29148c52ebbe5bce09c288bae6759f Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 24 Jun 2022 13:26:42 +0100 Subject: [PATCH] added test_MultClfs.py to test the functions now in a single script --- scripts/ml/test_MultClfs.py | 98 +++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 scripts/ml/test_MultClfs.py diff --git a/scripts/ml/test_MultClfs.py b/scripts/ml/test_MultClfs.py new file mode 100644 index 0000000..38bd599 --- /dev/null +++ b/scripts/ml/test_MultClfs.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 24 11:07:05 2022 + +@author: tanu +""" +import re +import argparse +############################################################################### +# gene = 'pncA' +# drug = 'pyrazinamide' +#total_mtblineage_uc = 8 + +#%% command line args: case sensitive +arg_parser = argparse.ArgumentParser() +arg_parser.add_argument('-d', '--drug', help = 'drug name', default = 'pncA') +arg_parser.add_argument('-g', '--gene', help = 'gene name', default = 'pyrazinamide') +args = arg_parser.parse_args() + +drug = args.drug +gene = args.gene + +############################################################################### + +############################################################################### +#================== +# Import data +#================== +from ml_data_7030 import * +setvars(gene,drug) +from ml_data_7030 import * + +# from YC run_all_ML: run locally +#from UQ_yc_RunAllClfs import run_all_ML + +#==================== +# Import ML function +#==================== +from MultClfs import * +#================== +# other vars +#================== +tts_split_7030 = '70_30' +OutFile_suffix = '7030' +#================== +# Specify outdir +#================== +outdir_ml = outdir + 'ml/tts_7030/' +print('\nOutput directory:', outdir_ml) + +#outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv' +#outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv' + +############################################################################### +print('\n#####################################################################\n' + , '\nRunning ML analysis: Multiple models' + , '\nGene name:', gene + , '\nDrug name:', drug) + +############################################################################### +#%% Test MultModelsCL WITHOUT returning formatted output +#================ +# MultModelsCl: without formatted output +#================ +mmD = MultModelsCl(input_df = X_smnc + , target = y_smnc + , var_type = 'mixed' + , tts_split_type = tts_split_7030 + , resampling_type = 'smnc' + , skf_cv = skf_cv + , blind_test_df = X_bts + , blind_test_target = y_bts + , add_cm = True + , add_yn = True + , return_formatted_output = False) + +#================ +# MultModelsCl: WITH formatted output +#================ +mmDF = MultModelsCl(input_df = X_smnc + , target = y_smnc + , var_type = 'mixed' + , tts_split_type = tts_split_7030 + , resampling_type = 'smnc' + , skf_cv = skf_cv + , blind_test_df = X_bts + , blind_test_target = y_bts + , add_cm = True + , add_yn = True + , return_formatted_output= True ) + + +#================= +# test function +#================= +# output from function call +ProcessMultModelCl(smnc_scores_mmD)