handled rpob 5uhc position offset in mcsm_ppi2
This commit is contained in:
parent
46e2c93885
commit
00b84ccb1c
30 changed files with 395 additions and 63 deletions
|
@ -24,7 +24,7 @@ from reference_dict import up_3letter_aa_dict
|
|||
from reference_dict import oneletter_aa_dict
|
||||
#%%============================================================================
|
||||
|
||||
def format_mcsm_ppi2_output(mcsm_ppi2_output_csv):
|
||||
def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
||||
"""
|
||||
@param mcsm_ppi2_output_csv: file containing mcsm_ppi2_results for all mcsm snps
|
||||
which is the result of combining all mcsm_ppi2 batch results, and using
|
||||
|
@ -78,30 +78,57 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv):
|
|||
|
||||
# # check
|
||||
# mcsm_ppi2_data['wild-type'].equals(mcsm_ppi2_data['WILD'])
|
||||
# mcsm_ppi2_data['mutant'].equals(mcsm_ppi2_data['MUT'])
|
||||
#%%============================================================================
|
||||
#############
|
||||
# rename cols
|
||||
#############
|
||||
# format colnames: all lowercase and consistent colnames
|
||||
mcsm_ppi2_data.columns
|
||||
print('Assigning meaningful colnames'
|
||||
, '\n=======================================================')
|
||||
|
||||
my_colnames_dict = {'chain': 'chain'
|
||||
, 'wild-type': 'wt_upper'
|
||||
, 'res-number': 'position'
|
||||
, 'mutant': 'mut_upper'
|
||||
, 'distance-to-interface': 'interface_dist'
|
||||
, 'mcsm-ppi2-prediction': 'mcsm_ppi2_affinity'
|
||||
, 'affinity': 'mcsm_ppi2_outcome'
|
||||
, 'w_type': 'wild_type' # one letter amino acid code
|
||||
, 'm_type': 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
# mcsm_ppi2_data['mutant'].equals(mcsm_ppi2_data['MUT'])
|
||||
#%%=====================================================================
|
||||
# add offset specified position number for rpob since 5uhc with chain 'C' was
|
||||
# used to run the analysis
|
||||
|
||||
geneL_sp = ['rpob']
|
||||
if gene_name.lower() in geneL_sp:
|
||||
offset = 6
|
||||
chain_orig = 'A'
|
||||
|
||||
# Add offset corrected position number. matching with rpob nsSNPs used for mCSM-lig
|
||||
# and also add corresponding chain id matching with rpob nsSNPs used for mCSM-lig
|
||||
mcsm_ppi2_data['position'] = mcsm_ppi2_data['res-number'] - offset
|
||||
mcsm_ppi2_data['chain'] = chain_orig
|
||||
mcsm_ppi2_data['5uhc_offset'] = offset
|
||||
|
||||
#############
|
||||
# rename cols
|
||||
#############
|
||||
# format colnames: all lowercase and consistent colnames
|
||||
mcsm_ppi2_data.columns
|
||||
print('Assigning meaningful colnames'
|
||||
, '\n=======================================================')
|
||||
|
||||
my_colnames_dict = {'chain' : 'chain'
|
||||
, 'position' : 'position'
|
||||
, '5uhc_offset' : '5uhc_offset'
|
||||
, 'wild-type' : 'wt_upper'
|
||||
, 'res-number' : '5uhc_position'
|
||||
, 'mutant' : 'mut_upper'
|
||||
, 'distance-to-interface': 'interface_dist'
|
||||
, 'mcsm-ppi2-prediction' : 'mcsm_ppi2_affinity'
|
||||
, 'affinity' : 'mcsm_ppi2_outcome'
|
||||
, 'w_type' : 'wild_type' # one letter amino acid code
|
||||
, 'm_type' : 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
else:
|
||||
my_colnames_dict = {'chain' : 'chain'
|
||||
, 'wild-type' : 'wt_upper'
|
||||
, 'res-number' : 'position'
|
||||
, 'mutant' : 'mut_upper'
|
||||
, 'distance-to-interface': 'interface_dist'
|
||||
, 'mcsm-ppi2-prediction' : 'mcsm_ppi2_affinity'
|
||||
, 'affinity' : 'mcsm_ppi2_outcome'
|
||||
, 'w_type' : 'wild_type' # one letter amino acid code
|
||||
, 'm_type' : 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
#%%==============================================================================
|
||||
mcsm_ppi2_data.rename(columns = my_colnames_dict, inplace = True)
|
||||
mcsm_ppi2_data.columns
|
||||
|
||||
|
||||
#############
|
||||
# create mutationinformation column
|
||||
#############
|
||||
|
@ -137,22 +164,47 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv):
|
|||
, '\nExpected number:', mcsm_ppi2_pos
|
||||
, '\nGot:', mcsm_ppi2_pos2
|
||||
, '\n======================================================')
|
||||
|
||||
#%%=====================================================================
|
||||
#############
|
||||
###################
|
||||
# reorder columns
|
||||
#############
|
||||
###################
|
||||
mcsm_ppi2_data.columns
|
||||
mcsm_ppi2_dataf = mcsm_ppi2_data[['mutationinformation'
|
||||
, 'mcsm_ppi2_affinity'
|
||||
, 'mcsm_ppi2_scaled'
|
||||
, 'mcsm_ppi2_outcome'
|
||||
, 'interface_dist'
|
||||
, 'wild_type'
|
||||
, 'position'
|
||||
, 'mutant_type'
|
||||
, 'wt_upper'
|
||||
, 'mut_upper'
|
||||
, 'chain']]
|
||||
|
||||
#---------------------
|
||||
# Determine col order
|
||||
#---------------------
|
||||
|
||||
core_cols = ['mutationinformation'
|
||||
, 'mcsm_ppi2_affinity'
|
||||
, 'mcsm_ppi2_scaled'
|
||||
, 'mcsm_ppi2_outcome'
|
||||
, 'interface_dist'
|
||||
, 'wild_type'
|
||||
, 'position'
|
||||
, 'mutant_type'
|
||||
, 'wt_upper'
|
||||
, 'mut_upper'
|
||||
, 'chain']
|
||||
|
||||
if gene_name.lower() in geneL_sp:
|
||||
|
||||
column_order = core_cols + ['5uhc_offset', '5uhc_position']
|
||||
|
||||
else:
|
||||
|
||||
column_order = core_cols.copy()
|
||||
|
||||
#--------------
|
||||
# reorder now
|
||||
#--------------
|
||||
mcsm_ppi2_dataf = mcsm_ppi2_data[column_order]
|
||||
|
||||
#%%============================================================================
|
||||
###################
|
||||
# Sort df based on
|
||||
# position columns
|
||||
###################
|
||||
mcsm_ppi2_dataf.sort_values(by = ['position', 'mutant_type'], inplace = True, ascending = True)
|
||||
|
||||
return(mcsm_ppi2_dataf)
|
||||
#%%#####################################################################
|
||||
#%%#####################################################################
|
Loading…
Add table
Add a link
Reference in a new issue