added baseline config files for running v2 ml analysis

This commit is contained in:
Tanushree Tunstall 2022-06-17 14:14:26 +01:00
parent 05dd9698c4
commit 96d4e61dca
6 changed files with 989 additions and 13 deletions

View file

@ -22,9 +22,9 @@ os.chdir( homedir + '/git/ML_AI_training/')
#from UQ_ML_data import *
#---------------------------
from UQ_ML_data2 import *
from ml_data import *
setvars(gene,drug)
from UQ_ML_data2 import *
from ml_data import *
# from YC run_all_ML: run locally
#from UQ_yc_RunAllClfs import run_all_ML
@ -54,7 +54,7 @@ print('AAindex features (n):'
, len(X_aaindexFN)
, '\nThese are:\n'
, X_aaindexFN
, '\n================================================================\n')
, '\n================================================================\n')
print('Evolutionary features (n):'
, len(X_evolFN)
@ -81,6 +81,12 @@ else:
sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/'
################################################################################
#==================
@ -103,8 +109,8 @@ baseline_BT = baseline_all.filter(like='bts_', axis=1)
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
# Write csv
baseline_CT.to_csv(outdir + 'ml/' + gene.lower() + '_baseline_CT_allF.csv')
baseline_BT.to_csv(outdir + 'ml/' + gene.lower() + '_baseline_BT_allF.csv')
baseline_CT.to_csv(outdir_ml + gene.lower() + '_baseline_CT_allF.csv')
baseline_BT.to_csv(outdir_ml + gene.lower() + '_baseline_BT_allF.csv')
#%% SMOTE NC: Oversampling [Numerical + categorical]
@ -124,8 +130,8 @@ smnc_BT = smnc_all.filter(like='bts_', axis=1)
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
# Write csv
smnc_CT.to_csv(outdir + 'ml/' + gene.lower() + '_smnc_CT_allF.csv')
smnc_BT.to_csv(outdir + 'ml/' + gene.lower() + '_smnc_BT_allF.csv')
smnc_CT.to_csv(outdir_ml + gene.lower() + '_smnc_CT_allF.csv')
smnc_BT.to_csv(outdir_ml + gene.lower() + '_smnc_BT_allF.csv')
#%% ROS: Numerical + categorical
mm_skf_scoresD3 = MultModelsCl(input_df = X_ros
@ -144,8 +150,8 @@ ros_BT = ros_all.filter(like='bts_', axis=1)
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
# Write csv
ros_CT.to_csv(outdir + 'ml/' + gene.lower() + '_ros_CT_allF.csv')
ros_BT.to_csv(outdir + 'ml/' + gene.lower() + '_ros_BT_allF.csv')
ros_CT.to_csv(outdir_ml + gene.lower() + '_ros_CT_allF.csv')
ros_BT.to_csv(outdir_ml + gene.lower() + '_ros_BT_allF.csv')
#%% RUS: Numerical + categorical
mm_skf_scoresD4 = MultModelsCl(input_df = X_rus
@ -164,8 +170,8 @@ rus_BT = rus_all.filter(like='bts_' , axis=1)
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
# Write csv
rus_CT.to_csv(outdir + 'ml/' + gene.lower() + '_rus_CT_allF.csv')
rus_BT.to_csv(outdir + 'ml/' + gene.lower() + '_rus_BT_allF.csv')
rus_CT.to_csv(outdir_ml + gene.lower() + '_rus_CT_allF.csv')
rus_BT.to_csv(outdir_ml + gene.lower() + '_rus_BT_allF.csv')
#%% ROS + RUS Combined: Numerical + categorical
mm_skf_scoresD8 = MultModelsCl(input_df = X_rouC
@ -184,5 +190,5 @@ rouC_BT = rouC_all.filter(like='bts_', axis=1)
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
# Write csv
rouC_CT.to_csv(outdir + 'ml/' + gene.lower() + '_rouC_CT_allF.csv')
rouC_BT.to_csv(outdir + 'ml/' + gene.lower() + '_rouC_BT_allF.csv')
rouC_CT.to_csv(outdir_ml + gene.lower() + '_rouC_CT_allF.csv')
rouC_BT.to_csv(outdir_ml + gene.lower() + '_rouC_BT_allF.csv')