From f761dd4479167b7cc33a34cb7ab5a853bd695645 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 29 May 2022 08:04:12 +0100 Subject: [PATCH] added notes to run yc models with dissected features --- UQ_yc_RunAllClfs.py | 105 ++++---------------------------------------- pnca_config.py | 6 ++- 2 files changed, 13 insertions(+), 98 deletions(-) diff --git a/UQ_yc_RunAllClfs.py b/UQ_yc_RunAllClfs.py index 69d01c2..a76199e 100644 --- a/UQ_yc_RunAllClfs.py +++ b/UQ_yc_RunAllClfs.py @@ -1,3 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun May 29 07:43:21 2022 + +@author: tanu +""" import pandas as pd import numpy as np import scipy as sp @@ -251,100 +258,4 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p #return(result_pd) return(results_all) - - -#%% CALL function -#run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -# Baseline_data - -YC_resD2 = run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -CVResultsDF_baseline = YC_resD2['CrossValResultsDF'] -CVResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -BTSResultsDF_baseline = YC_resD2['BlindTestResultsDF'] -BTSResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# from sklearn.utils import all_estimators -# for name, algorithm in all_estimators(type_filter="classifier"): -# clf = algorithm() -# print('Name:', name, '\nAlgo:', clf) - -# Random Oversampling -YC_resD_ros = run_all_ML(input_pd=X_ros, target_label=y_ros, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -CVResultsDF_ros = YC_resD_ros['CrossValResultsDF'] -CVResultsDF_ros.sort_values(by=['matthew'], ascending=False, inplace=True) -BTSResultsDF_ros = YC_resD_ros['BlindTestResultsDF'] -BTSResultsDF_ros.sort_values(by=['matthew'], ascending=False, inplace=True) - -# Random Undersampling -YC_resD_rus = run_all_ML(input_pd=X_rus, target_label=y_rus, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -CVResultsDF_rus = YC_resD_rus['CrossValResultsDF'] -CVResultsDF_rus.sort_values(by=['matthew'], ascending=False, inplace=True) -BTSResultsDF_rus = YC_resD_rus['BlindTestResultsDF'] -BTSResultsDF_rus.sort_values(by=['matthew'], ascending=False, inplace=True) - -# Random Oversampling+Undersampling -YC_resD_rouC = run_all_ML(input_pd=X_rouC, target_label=y_rouC, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -CVResultsDF_rouC = YC_resD_rouC['CrossValResultsDF'] -CVResultsDF_rouC.sort_values(by=['matthew'], ascending=False, inplace=True) -BTSResultsDF_rouC = YC_resD_rouC['BlindTestResultsDF'] -BTSResultsDF_rouC.sort_values(by=['matthew'], ascending=False, inplace=True) - -# SMOTE NC -YC_resD_smnc = run_all_ML(input_pd=X_smnc, target_label=y_smnc, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -CVResultsDF_smnc = YC_resD_smnc['CrossValResultsDF'] -CVResultsDF_smnc.sort_values(by=['matthew'], ascending=False, inplace=True) -BTSResultsDF_smnc = YC_resD_smnc['BlindTestResultsDF'] -BTSResultsDF_smnc.sort_values(by=['matthew'], ascending=False, inplace=True) -############################################################################## -#============================================ -# BASELINE models with dissected featues -#============================================ -# Genomics -yC_gf = run_all_ML(input_pd=X[X_genomicFN], target_label=y, blind_test_input_df=X_bts[X_genomicFN], blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_gfCT_baseline= yC_gf['CrossValResultsDF'] -yc_gfCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_gfBT_baseline = yC_gf['BlindTestResultsDF'] -yc_gfBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# Evolutionary -yC_ev = run_all_ML(input_pd=X[X_evolFN], target_label=y, blind_test_input_df=X_bts[X_evolFN], blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_evCT_baseline= yC_ev['CrossValResultsDF'] -yc_evCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_evBT_baseline = yC_ev['BlindTestResultsDF'] -yc_evBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# strucF:All -yC_sfall = run_all_ML(input_pd=X[X_strFN], target_label=y, blind_test_input_df=X_bts[X_strFN], blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_sfallCT_baseline= yC_sfall['CrossValResultsDF'] -yc_sfallCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_sfallBT_baseline = yC_sfall['BlindTestResultsDF'] -yc_sfallBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# strucF:Common ONLY -yC_sfco= run_all_ML(input_pd=X[common_cols_stabiltyN], target_label=y - , blind_test_input_df=X_bts[common_cols_stabiltyN] - , blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_sfcoCT_baseline= yC_sfco['CrossValResultsDF'] -yc_sfcoCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_sfcoBT_baseline = yC_sfco['BlindTestResultsDF'] -yc_sfcoBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# strucF:common_stability + foldX_cols i.e interaction -yC_fxss= run_all_ML(input_pd=X[common_cols_stabiltyN+foldX_cols], target_label=y - , blind_test_input_df=X_bts[common_cols_stabiltyN+foldX_cols] - , blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_fxssCT_baseline= yC_fxss['CrossValResultsDF'] -yc_fxssCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_fxssBT_baseline = yC_fxss['BlindTestResultsDF'] -yc_fxssBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - -# categorical -yC_cat= run_all_ML(input_pd=X[categorical_FN], target_label=y - , blind_test_input_df=X_bts[categorical_FN] - , blind_test_target=y_bts, preprocess = True, var_type = 'mixed') -yc_catCT_baseline= yC_cat['CrossValResultsDF'] -yc_catCT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) -yc_catBT_baseline = yC_cat['BlindTestResultsDF'] -yc_catBT_baseline.sort_values(by=['matthew'], ascending=False, inplace=True) - - + \ No newline at end of file diff --git a/pnca_config.py b/pnca_config.py index db080a4..794914e 100755 --- a/pnca_config.py +++ b/pnca_config.py @@ -12,7 +12,6 @@ gene = 'pncA' drug = 'pyrazinamide' #total_mtblineage_u = 8 - homedir = os.path.expanduser("~") os.chdir( homedir + '/git/ML_AI_training/') @@ -21,8 +20,13 @@ setvars(gene,drug) from UQ_ML_data import * # from YC run_all_ML: run locally +from UQ_yc_RunAllClfs import run_all_ML + +# TT run all ML clfs: baseline mode from UQ_MultModelsCl import MultModelsCl +#%%########################################################################### + print('\n#####################################################################\n') print('TESTING cmd:' , '\nGene name:', gene