changed dir for reading func in pnca_config.py
This commit is contained in:
parent
96d4e61dca
commit
e6d3692445
2 changed files with 10 additions and 14 deletions
|
@ -423,9 +423,9 @@ def setvars(gene,drug):
|
|||
#==========================
|
||||
my_df_ml = my_df.copy()
|
||||
|
||||
#==========================
|
||||
# BLIND test set
|
||||
#==========================
|
||||
#===============================
|
||||
# Training and BLIND test set
|
||||
#===============================
|
||||
# Separate blind test set
|
||||
my_df_ml[drug].isna().sum()
|
||||
|
||||
|
@ -435,7 +435,7 @@ def setvars(gene,drug):
|
|||
training_df = my_df_ml[my_df_ml[drug].notna()]
|
||||
training_df.shape
|
||||
|
||||
# Target1: dst
|
||||
# Target1: dst_mode
|
||||
training_df[drug].value_counts()
|
||||
training_df['dst_mode'].value_counts()
|
||||
|
||||
|
@ -514,15 +514,11 @@ def setvars(gene,drug):
|
|||
|
||||
print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
|
||||
|
||||
#%% Construct numerical and categorical column names
|
||||
# numerical feature names
|
||||
# numerical_FN = common_cols_stabiltyN + foldX_cols + X_strFN + X_evolFN + X_genomicFN
|
||||
|
||||
#numerical_FN = X_ssFN + X_evolFN + X_genomicFN
|
||||
numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN
|
||||
|
||||
|
||||
#categorical feature names
|
||||
# categorical feature names
|
||||
categorical_FN = ['ss_class'
|
||||
# , 'wt_prop_water'
|
||||
# , 'mut_prop_water'
|
||||
|
@ -534,8 +530,8 @@ def setvars(gene,drug):
|
|||
, 'electrostatics_change'
|
||||
, 'polarity_change'
|
||||
, 'water_change'
|
||||
#, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1]
|
||||
, 'active_site'
|
||||
, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
|
||||
, 'active_site' #[didn't use it for uq_v1]
|
||||
#, 'gene_name' # will be required for the combined stuff
|
||||
]
|
||||
#----------------------------------------------
|
||||
|
@ -561,7 +557,7 @@ def setvars(gene,drug):
|
|||
my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
|
||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
|
||||
|
||||
# mask the column ligand distance > 10
|
||||
# mask the mcsm affinity related columns where ligand distance > 10
|
||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
||||
(my_df_ml['ligand_affinity_change'] == 0).sum()
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ drug = 'pyrazinamide'
|
|||
#total_mtblineage_uc = 8
|
||||
|
||||
homedir = os.path.expanduser("~")
|
||||
os.chdir( homedir + '/git/ML_AI_training/')
|
||||
os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
|
||||
|
||||
#---------------------------
|
||||
# Version 1: no AAindex
|
||||
|
@ -30,7 +30,7 @@ from ml_data import *
|
|||
#from UQ_yc_RunAllClfs import run_all_ML
|
||||
|
||||
# TT run all ML clfs: baseline mode
|
||||
from UQ_MultModelsCl import MultModelsCl
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue