From 7dc7e25016bcc112113a91d5c27d60930b544204 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 24 Jun 2022 13:41:07 +0100 Subject: [PATCH] appened sys.path to allow local imports --- scripts/ml/run_7030_LOOP.py | 129 +++++------------------------------- scripts/ml/test_MultClfs.py | 3 +- 2 files changed, 17 insertions(+), 115 deletions(-) diff --git a/scripts/ml/run_7030_LOOP.py b/scripts/ml/run_7030_LOOP.py index 0665906..1b12a38 100644 --- a/scripts/ml/run_7030_LOOP.py +++ b/scripts/ml/run_7030_LOOP.py @@ -5,24 +5,26 @@ Created on Mon Jun 20 13:05:23 2022 @author: tanu """ +#%%Imports #################################################################### import re import argparse -############################################################################### +import os, sys # gene = 'pncA' # drug = 'pyrazinamide' #total_mtblineage_uc = 8 - +############################################################################### #%% command line args: case sensitive -arg_parser = argparse.ArgumentParser() -arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') -arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') -args = arg_parser.parse_args() +# arg_parser = argparse.ArgumentParser() +# arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') +# arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') +# args = arg_parser.parse_args() -drug = args.drug -gene = args.gene +# drug = args.drug +# gene = args.gene ############################################################################### - +homedir = os.path.expanduser("~") +sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml') ############################################################################### #================== # Import data @@ -35,16 +37,16 @@ from ml_data_7030 import * #from UQ_yc_RunAllClfs import run_all_ML #==================== -# Import ML function +# Import ML functions #==================== -# TT run all ML clfs: baseline model -from MultModelsCl import MultModelsCl +from MultClfs import * #================== # other vars #================== tts_split_7030 = '70_30' OutFile_suffix = '7030' + #================== # Specify outdir #================== @@ -54,57 +56,12 @@ print('\nOutput directory:', outdir_ml) outFile_wf = outdir_ml + gene.lower() + '_baselineC_' + OutFile_suffix + '.csv' outFile_lf = outdir_ml + gene.lower() + '_baselineC_ext_' + OutFile_suffix + '.csv' -############################################################################### -score_type_ordermapD = { 'mcc' : 1 - , 'fscore' : 2 - , 'jcc' : 3 - , 'precision' : 4 - , 'recall' : 5 - , 'accuracy' : 6 - , 'roc_auc' : 7 - , 'TN' : 8 - , 'FP' : 9 - , 'FN' : 10 - , 'TP' : 11 - , 'trainingY_neg': 12 - , 'trainingY_pos': 13 - , 'blindY_neg' : 14 - , 'blindY_pos' : 15 - , 'fit_time' : 16 - , 'score_time' : 17 - } - -scoreCV_mapD = {'test_mcc' : 'MCC' - , 'test_fscore' : 'F1' - , 'test_precision' : 'Precision' - , 'test_recall' : 'Recall' - , 'test_accuracy' : 'Accuracy' - , 'test_roc_auc' : 'ROC_AUC' - , 'test_jcc' : 'JCC' - } - -scoreBT_mapD = {'bts_mcc' : 'MCC' - , 'bts_fscore' : 'F1' - , 'bts_precision' : 'Precision' - , 'bts_recall' : 'Recall' - , 'bts_accuracy' : 'Accuracy' - , 'bts_roc_auc' : 'ROC_AUC' - , 'bts_jcc' : 'JCC' - } - -# # data dependent variables but NOT dependent on resampling -# bts_size = len(X_bts) -# yc2 = Counter(y_bts) -# yc2_ratio = yc2[0]/yc2[1] - -############################################################################### +#%% Running models ############################################################ print('\n#####################################################################\n' , '\nRunning ML analysis: feature groups ' , '\nGene name:', gene , '\nDrug name:', drug) - - fooD = {'baseline_paramD': { 'input_df': X , 'target': y @@ -160,62 +117,6 @@ scores_7030D = MultModelsCl(**rouC_paramD , add_cm = True , add_yn = True) -############################################################################### -#%% SMOTE NC: Smote Oversampling [Numerical + categorical] -#================ -# Baseline -# SMOTE NC: SMNC -#================ -smnc_scores_mmD = MultModelsCl(input_df = X_smnc - , target = y_smnc - , var_type = 'mixed' - , tts_split_type = tts_split_7030 - , resampling_type = 'smnc' - , skf_cv = skf_cv - , blind_test_df = X_bts - , blind_test_target = y_bts - , add_cm = True - , add_yn = True - , return_formatted_output = True): -) - -smnc_all_scores = pd.DataFrame(smnc_scores_mmD) -rs_smnc = 'smnc' -#------------------------ -# WF: only CV and BTS -#----------------------- -smnc_allT = smnc_all_scores.T - -smnc_CV = smnc_allT.filter(regex='test_', axis = 1); smnc_CV.columns -# map colnames for consistency to allow concatenting -smnc_CV.columns = smnc_CV.columns.map(scoreCV_mapD); smnc_CV.columns -smnc_CV['Data_source'] = 'CV' -smnc_CV['Resampling'] = rs_smnc - -smnc_BT = smnc_allT.filter(regex='bts_', axis = 1); smnc_BT.columns -# map colnames for consistency to allow concatenting -smnc_BT.columns = smnc_BT.columns.map(scoreBT_mapD); smnc_BT.columns -smnc_BT['Data_source'] = 'BT' -smnc_BT['Resampling'] = rs_smnc - -# Write csv -# smnc_CV.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -# smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True) -# smnc_CV.to_csv(outdir_ml + gene.lower() + '_smnc_CV_allF.csv') -# smnc_BT.to_csv(outdir_ml + gene.lower() + '_smnc_BT_allF.csv') - -# other data dependent variables -training_size_smnc = len(X_smnc) -n_features = len(X_smnc.columns) -yc1_smnc = Counter(y_smnc) -yc1_ratio_smnc = yc1_smnc[0]/yc1_smnc[1] - -smnc_all['training_size'] = training_size_smnc -smnc_all['trainingY_ratio'] = round(yc1_ratio_smnc,2) -smnc_all['n_features'] = n_features - -############################################################################### - ############################################################################### ############################################################################### #%% COMBINING all dfs: WF and LF diff --git a/scripts/ml/test_MultClfs.py b/scripts/ml/test_MultClfs.py index 38bd599..201233b 100644 --- a/scripts/ml/test_MultClfs.py +++ b/scripts/ml/test_MultClfs.py @@ -35,9 +35,10 @@ from ml_data_7030 import * #from UQ_yc_RunAllClfs import run_all_ML #==================== -# Import ML function +# Import ML functions #==================== from MultClfs import * + #================== # other vars #==================