changed dir for reading func in pnca_config.py
This commit is contained in:
parent
96d4e61dca
commit
e6d3692445
2 changed files with 10 additions and 14 deletions
|
@ -423,9 +423,9 @@ def setvars(gene,drug):
|
||||||
#==========================
|
#==========================
|
||||||
my_df_ml = my_df.copy()
|
my_df_ml = my_df.copy()
|
||||||
|
|
||||||
#==========================
|
#===============================
|
||||||
# BLIND test set
|
# Training and BLIND test set
|
||||||
#==========================
|
#===============================
|
||||||
# Separate blind test set
|
# Separate blind test set
|
||||||
my_df_ml[drug].isna().sum()
|
my_df_ml[drug].isna().sum()
|
||||||
|
|
||||||
|
@ -435,7 +435,7 @@ def setvars(gene,drug):
|
||||||
training_df = my_df_ml[my_df_ml[drug].notna()]
|
training_df = my_df_ml[my_df_ml[drug].notna()]
|
||||||
training_df.shape
|
training_df.shape
|
||||||
|
|
||||||
# Target1: dst
|
# Target1: dst_mode
|
||||||
training_df[drug].value_counts()
|
training_df[drug].value_counts()
|
||||||
training_df['dst_mode'].value_counts()
|
training_df['dst_mode'].value_counts()
|
||||||
|
|
||||||
|
@ -514,15 +514,11 @@ def setvars(gene,drug):
|
||||||
|
|
||||||
print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
|
print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
|
||||||
|
|
||||||
#%% Construct numerical and categorical column names
|
|
||||||
# numerical feature names
|
# numerical feature names
|
||||||
# numerical_FN = common_cols_stabiltyN + foldX_cols + X_strFN + X_evolFN + X_genomicFN
|
|
||||||
|
|
||||||
#numerical_FN = X_ssFN + X_evolFN + X_genomicFN
|
|
||||||
numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN
|
numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN
|
||||||
|
|
||||||
|
|
||||||
#categorical feature names
|
# categorical feature names
|
||||||
categorical_FN = ['ss_class'
|
categorical_FN = ['ss_class'
|
||||||
# , 'wt_prop_water'
|
# , 'wt_prop_water'
|
||||||
# , 'mut_prop_water'
|
# , 'mut_prop_water'
|
||||||
|
@ -534,8 +530,8 @@ def setvars(gene,drug):
|
||||||
, 'electrostatics_change'
|
, 'electrostatics_change'
|
||||||
, 'polarity_change'
|
, 'polarity_change'
|
||||||
, 'water_change'
|
, 'water_change'
|
||||||
#, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1]
|
, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
|
||||||
, 'active_site'
|
, 'active_site' #[didn't use it for uq_v1]
|
||||||
#, 'gene_name' # will be required for the combined stuff
|
#, 'gene_name' # will be required for the combined stuff
|
||||||
]
|
]
|
||||||
#----------------------------------------------
|
#----------------------------------------------
|
||||||
|
@ -561,7 +557,7 @@ def setvars(gene,drug):
|
||||||
my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
|
my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
|
||||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
|
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
|
||||||
|
|
||||||
# mask the column ligand distance > 10
|
# mask the mcsm affinity related columns where ligand distance > 10
|
||||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
||||||
(my_df_ml['ligand_affinity_change'] == 0).sum()
|
(my_df_ml['ligand_affinity_change'] == 0).sum()
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ drug = 'pyrazinamide'
|
||||||
#total_mtblineage_uc = 8
|
#total_mtblineage_uc = 8
|
||||||
|
|
||||||
homedir = os.path.expanduser("~")
|
homedir = os.path.expanduser("~")
|
||||||
os.chdir( homedir + '/git/ML_AI_training/')
|
os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
|
||||||
|
|
||||||
#---------------------------
|
#---------------------------
|
||||||
# Version 1: no AAindex
|
# Version 1: no AAindex
|
||||||
|
@ -30,7 +30,7 @@ from ml_data import *
|
||||||
#from UQ_yc_RunAllClfs import run_all_ML
|
#from UQ_yc_RunAllClfs import run_all_ML
|
||||||
|
|
||||||
# TT run all ML clfs: baseline mode
|
# TT run all ML clfs: baseline mode
|
||||||
from UQ_MultModelsCl import MultModelsCl
|
from MultModelsCl import MultModelsCl
|
||||||
|
|
||||||
#%%###########################################################################
|
#%%###########################################################################
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue