various changes
This commit is contained in:
parent
f761dd4479
commit
5202be4adc
52 changed files with 1440 additions and 88 deletions
92
temp.py
Executable file
92
temp.py
Executable file
|
@ -0,0 +1,92 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Sun May 29 09:22:51 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
|
||||
geneL_basic = ['pncA']
|
||||
geneL_na = ['gid']
|
||||
geneL_na_ppi2 = ['rpoB']
|
||||
geneL_ppi2 = ['alr', 'embB', 'katG']
|
||||
#%% get cols
|
||||
mycols = my_df.columns
|
||||
|
||||
# # change from numberic to
|
||||
# num_type = ['int64', 'float64']
|
||||
# cat_type = ['object', 'bool']
|
||||
|
||||
# if my_df['active_aa_pos'].dtype in num_type:
|
||||
# my_df['active_aa_pos'] = my_df['active_aa_pos'].astype(object)
|
||||
# my_df['active_aa_pos'].dtype
|
||||
|
||||
# FIXME: if this is not structural, remove from source..
|
||||
# Drop NA where numerical cols have them
|
||||
if gene.lower() in geneL_na_ppi2:
|
||||
#D1148 get rid of
|
||||
na_index = my_df['mutationinformation'].index[my_df['mcsm_na_affinity'].apply(np.isnan)]
|
||||
my_df = my_df.drop(index=na_index)
|
||||
|
||||
# FIXME: either impute or remove!
|
||||
# for embb (L114M, F115L, V123L, V125I, V131M) delete for now
|
||||
if gene.lower() in ['embb']:
|
||||
na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)]
|
||||
#my_df = my_df.drop(index=na_index))# RERUN embb with the 5 values now present
|
||||
#%%===========================================================================
|
||||
|
||||
#%%
|
||||
# GET X
|
||||
common_cols_stabiltyN = ['ligand_distance'
|
||||
, 'ligand_affinity_change'
|
||||
, 'duet_stability_change'
|
||||
, 'ddg_foldx'
|
||||
, 'deepddg'
|
||||
, 'ddg_dynamut2'
|
||||
, 'contacts']
|
||||
|
||||
# Build stability columns ~ gene
|
||||
if gene.lower() in geneL_basic:
|
||||
x_stabilityN = common_cols_stabiltyN
|
||||
cols_to_mask = ['ligand_affinity_change']
|
||||
|
||||
if gene.lower() in geneL_ppi2:
|
||||
# x_stabilityN = common_cols_stabiltyN + ['mcsm_ppi2_affinity' , 'interface_dist']
|
||||
geneL_ppi2_st_cols = ['mcsm_ppi2_affinity', 'interface_dist']
|
||||
x_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols
|
||||
cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity']
|
||||
|
||||
if gene.lower() in geneL_na:
|
||||
# x_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity']
|
||||
geneL_na_st_cols = ['mcsm_na_affinity']
|
||||
x_stabilityN = common_cols_stabiltyN + geneL_na_st_cols
|
||||
cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity']
|
||||
|
||||
if gene.lower() in geneL_na_ppi2:
|
||||
# x_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist']
|
||||
geneL_na_ppi2_st_cols = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist']
|
||||
x_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols
|
||||
cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity']
|
||||
|
||||
|
||||
#%% Masking columns (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10
|
||||
my_df_ml['mutationinformation'][my_df_ml['ligand_distance']>10].value_counts()
|
||||
my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
|
||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
|
||||
|
||||
# mask the column ligand distance > 10
|
||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
||||
(my_df_ml['ligand_affinity_change'] == 0).sum()
|
||||
|
||||
mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask]
|
||||
|
||||
|
||||
for i in range(len(cols_to_mask)):
|
||||
ind = i+1
|
||||
print('\nindex:', i, '\nind:', ind)
|
||||
print('\nMask count check:'
|
||||
, (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
|
||||
)
|
||||
|
||||
(my_df_ml[cols_to_mask[0]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
|
||||
(my_df_ml[cols_to_mask[1]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
|
Loading…
Add table
Add a link
Reference in a new issue