From 7dc7e25016bcc112113a91d5c27d60930b544204 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Fri, 24 Jun 2022 13:41:07 +0100
Subject: [PATCH] appened sys.path to allow local imports

---
 scripts/ml/run_7030_LOOP.py | 129 +++++-------------------------------
 scripts/ml/test_MultClfs.py |   3 +-
 2 files changed, 17 insertions(+), 115 deletions(-)

diff --git a/scripts/ml/run_7030_LOOP.py b/scripts/ml/run_7030_LOOP.py
index 0665906..1b12a38 100644
--- a/scripts/ml/run_7030_LOOP.py
+++ b/scripts/ml/run_7030_LOOP.py
@@ -5,24 +5,26 @@ Created on Mon Jun 20 13:05:23 2022
 
 @author: tanu
 """
+#%%Imports ####################################################################
 import re
 import argparse
-###############################################################################
+import os, sys
 # gene  = 'pncA'
 # drug  = 'pyrazinamide'
 #total_mtblineage_uc = 8
-
+###############################################################################
 #%% command line args: case sensitive
-arg_parser = argparse.ArgumentParser()
-arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
-arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') 
-args = arg_parser.parse_args()
+# arg_parser = argparse.ArgumentParser()
+# arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
+# arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') 
+# args = arg_parser.parse_args()
 
-drug    = args.drug
-gene    = args.gene
+# drug    = args.drug
+# gene    = args.gene
 
 ###############################################################################
-
+homedir = os.path.expanduser("~")
+sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml')
 ###############################################################################
 #==================
 # Import data
@@ -35,16 +37,16 @@ from ml_data_7030 import *
 #from UQ_yc_RunAllClfs import run_all_ML
 
 #====================
-# Import ML function 
+# Import ML functions 
 #====================
-# TT run all ML clfs: baseline model
-from MultModelsCl import MultModelsCl
+from MultClfs import *
 
 #==================
 # other vars
 #==================
 tts_split_7030    = '70_30'
 OutFile_suffix  = '7030'
+
 #==================
 # Specify outdir 
 #==================
@@ -54,57 +56,12 @@ print('\nOutput directory:', outdir_ml)
 outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv'
 outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv'
 
-###############################################################################
-score_type_ordermapD = { 'mcc'      : 1
-                   , 'fscore'       : 2
-                   , 'jcc'          : 3
-                   , 'precision'    : 4
-                   , 'recall'       : 5      
-                   , 'accuracy'     : 6  
-                   , 'roc_auc'      : 7
-                   , 'TN'           : 8
-                   , 'FP'           : 9
-                   , 'FN'           : 10
-                   , 'TP'           : 11  
-                   , 'trainingY_neg': 12  
-                   , 'trainingY_pos': 13    
-                   , 'blindY_neg'   : 14
-                   , 'blindY_pos'   : 15
-                   , 'fit_time'     : 16
-                   , 'score_time'   : 17
-                   }
-
-scoreCV_mapD = {'test_mcc'         : 'MCC'
-                , 'test_fscore'    : 'F1'
-                , 'test_precision' : 'Precision'
-                , 'test_recall'    : 'Recall'
-                , 'test_accuracy'  : 'Accuracy'
-                , 'test_roc_auc'   : 'ROC_AUC'
-                , 'test_jcc'       : 'JCC'
-                }
-
-scoreBT_mapD = {'bts_mcc'          : 'MCC'
-                , 'bts_fscore'     : 'F1'
-                , 'bts_precision'  : 'Precision'
-                , 'bts_recall'     : 'Recall'
-                , 'bts_accuracy'   : 'Accuracy'
-                , 'bts_roc_auc'    : 'ROC_AUC'
-                , 'bts_jcc'        : 'JCC'
-               }
-
-# # data dependent variables but NOT dependent on resampling
-# bts_size  = len(X_bts)
-# yc2       = Counter(y_bts)
-# yc2_ratio = yc2[0]/yc2[1]
-
-###############################################################################
+#%% Running models ############################################################
 print('\n#####################################################################\n'
       , '\nRunning ML analysis: feature groups '
       , '\nGene name:', gene
       , '\nDrug name:', drug)
 
-
-
 fooD = {'baseline_paramD': {
                    'input_df': X
                    , 'target': y
@@ -160,62 +117,6 @@ scores_7030D = MultModelsCl(**rouC_paramD
                     , add_cm = True 
                     , add_yn = True)
 
-###############################################################################
-#%% SMOTE NC: Smote Oversampling [Numerical + categorical]
-#================
-# Baseline
-# SMOTE NC: SMNC
-#================
-smnc_scores_mmD = MultModelsCl(input_df = X_smnc
-                    , target = y_smnc
-                    , var_type = 'mixed'
-                    , tts_split_type = tts_split_7030
-                    , resampling_type = 'smnc'
-                    , skf_cv = skf_cv
-                    , blind_test_df = X_bts
-                    , blind_test_target = y_bts
-                    , add_cm = True 
-                    , add_yn = True
-                    , return_formatted_output = True):
-)
-
-smnc_all_scores = pd.DataFrame(smnc_scores_mmD)
-rs_smnc = 'smnc'
-#------------------------
-#  WF: only CV and BTS
-#-----------------------
-smnc_allT = smnc_all_scores.T
-
-smnc_CV = smnc_allT.filter(regex='test_', axis = 1); smnc_CV.columns
-# map colnames for consistency to allow concatenting
-smnc_CV.columns = smnc_CV.columns.map(scoreCV_mapD); smnc_CV.columns
-smnc_CV['Data_source'] = 'CV'
-smnc_CV['Resampling']  = rs_smnc
-
-smnc_BT = smnc_allT.filter(regex='bts_', axis = 1); smnc_BT.columns
-# map colnames for consistency to allow concatenting
-smnc_BT.columns = smnc_BT.columns.map(scoreBT_mapD); smnc_BT.columns
-smnc_BT['Data_source'] = 'BT'
-smnc_BT['Resampling']  = rs_smnc
-
-# Write csv
-# smnc_CV.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
-# smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
-# smnc_CV.to_csv(outdir_ml + gene.lower() + '_smnc_CV_allF.csv')
-# smnc_BT.to_csv(outdir_ml + gene.lower() + '_smnc_BT_allF.csv')
-
-# other data dependent variables
-training_size_smnc = len(X_smnc)
-n_features         = len(X_smnc.columns)
-yc1_smnc              = Counter(y_smnc)
-yc1_ratio_smnc        = yc1_smnc[0]/yc1_smnc[1]
-
-smnc_all['training_size']   = training_size_smnc
-smnc_all['trainingY_ratio'] = round(yc1_ratio_smnc,2)
-smnc_all['n_features']      = n_features
-
-###############################################################################
-
 ###############################################################################
 ###############################################################################
 #%% COMBINING all dfs: WF and LF
diff --git a/scripts/ml/test_MultClfs.py b/scripts/ml/test_MultClfs.py
index 38bd599..201233b 100644
--- a/scripts/ml/test_MultClfs.py
+++ b/scripts/ml/test_MultClfs.py
@@ -35,9 +35,10 @@ from ml_data_7030 import *
 #from UQ_yc_RunAllClfs import run_all_ML
 
 #====================
-# Import ML function 
+# Import ML functions 
 #====================
 from MultClfs import *
+
 #==================
 # other vars
 #==================