changed dir for reading func in pnca_config.py

This commit is contained in:
Tanushree Tunstall 2022-06-17 16:37:07 +01:00
parent 96d4e61dca
commit e6d3692445
2 changed files with 10 additions and 14 deletions

View file

@ -423,9 +423,9 @@ def setvars(gene,drug):
#========================== #==========================
my_df_ml = my_df.copy() my_df_ml = my_df.copy()
#========================== #===============================
# BLIND test set # Training and BLIND test set
#========================== #===============================
# Separate blind test set # Separate blind test set
my_df_ml[drug].isna().sum() my_df_ml[drug].isna().sum()
@ -435,7 +435,7 @@ def setvars(gene,drug):
training_df = my_df_ml[my_df_ml[drug].notna()] training_df = my_df_ml[my_df_ml[drug].notna()]
training_df.shape training_df.shape
# Target1: dst # Target1: dst_mode
training_df[drug].value_counts() training_df[drug].value_counts()
training_df['dst_mode'].value_counts() training_df['dst_mode'].value_counts()
@ -514,15 +514,11 @@ def setvars(gene,drug):
print('\nTotal no. of features for aaindex:', len(X_aaindexFN)) print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
#%% Construct numerical and categorical column names
# numerical feature names # numerical feature names
# numerical_FN = common_cols_stabiltyN + foldX_cols + X_strFN + X_evolFN + X_genomicFN
#numerical_FN = X_ssFN + X_evolFN + X_genomicFN
numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN
#categorical feature names # categorical feature names
categorical_FN = ['ss_class' categorical_FN = ['ss_class'
# , 'wt_prop_water' # , 'wt_prop_water'
# , 'mut_prop_water' # , 'mut_prop_water'
@ -534,8 +530,8 @@ def setvars(gene,drug):
, 'electrostatics_change' , 'electrostatics_change'
, 'polarity_change' , 'polarity_change'
, 'water_change' , 'water_change'
#, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1] , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
, 'active_site' , 'active_site' #[didn't use it for uq_v1]
#, 'gene_name' # will be required for the combined stuff #, 'gene_name' # will be required for the combined stuff
] ]
#---------------------------------------------- #----------------------------------------------
@ -561,7 +557,7 @@ def setvars(gene,drug):
my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts() my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts() my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
# mask the column ligand distance > 10 # mask the mcsm affinity related columns where ligand distance > 10
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0 my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
(my_df_ml['ligand_affinity_change'] == 0).sum() (my_df_ml['ligand_affinity_change'] == 0).sum()

View file

@ -13,7 +13,7 @@ drug = 'pyrazinamide'
#total_mtblineage_uc = 8 #total_mtblineage_uc = 8
homedir = os.path.expanduser("~") homedir = os.path.expanduser("~")
os.chdir( homedir + '/git/ML_AI_training/') os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
#--------------------------- #---------------------------
# Version 1: no AAindex # Version 1: no AAindex
@ -30,7 +30,7 @@ from ml_data import *
#from UQ_yc_RunAllClfs import run_all_ML #from UQ_yc_RunAllClfs import run_all_ML
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from UQ_MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### #%%###########################################################################