added another aa dict type to reference_dict.py and calculated electrostatisc changes for muts based on adding these properties to mcsm mut stule snps. This will allow the calculation on a given file type since the ref dict can now easily be adapted.

This commit is contained in:
Tanushree Tunstall 2021-06-11 17:12:21 +01:00
parent f88e2665e9
commit 8fa9faa17d
2 changed files with 68 additions and 24 deletions

View file

@ -29,6 +29,8 @@ homedir = os.path.expanduser('~')
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
from reference_dict import oneletter_aa_dict
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') #
args = arg_parser.parse_args()
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
drug = args.drug
gene = args.gene
@ -101,6 +101,17 @@ list(comb_df.columns)
# clear variables
del(in_filename, infile_merged_df3)
#%%
#----------------------------------------------------------------
# add aa properties considering df has columns:
# 'wild_type', 'mutant_type' separately as single letter aa code
#----------------------------------------------------------------
lookup_dict = dict()
for k, v in oneletter_aa_dict.items():
lookup_dict[k] = v['aa_calcprop']
#print(lookup_dict)
comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict)
comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict)
#%% subset unique mutations
df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')

View file

@ -6,13 +6,12 @@ Created on Tue Jun 18 11:32:28 2019
@author: tanushree
"""
#=======================================================================
# TASK: creating an aa dict to map 3 letter and other combinations of
# aa codes to one-letter aa code and also with aa properties.
# TASK: creating an aa dict to map aa 3-letter and 1-letter code to aa
# properties. Other mappings can also be created trivially by adpating
# the code
# Input: .csv file containing aa_code
# Output: is called by other .py script to perform this mapping.
# Output: sourced by other .py script to perform aa property mapping.
#=======================================================================
#%% load packages
import pandas as pd
@ -20,17 +19,8 @@ import os
#=======================================================================
#%% specify homedir and curr dir
homedir = os.path.expanduser('~')
# set working dir
#os.getcwd()
#os.chdir(homedir + '/git/LSHTM_analysis/scripts')
#os.getcwd()
#=======================================================================
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
#gene_match = gene + '_p.'
#%% variable assignment
#==========
# data dir
#==========
@ -66,6 +56,8 @@ my_aa.index
# with your choice of column name that you have assigned to index as the "primary key".
# using 'index' creates a dict of dicts
# using 'records' creates a list of dicts
#FIXME: remove this and replace with below as shown
my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
#print('Printing my_aa_dict:', my_aa_dict.keys())
@ -160,7 +152,12 @@ aa_calcprop = { ('D', 'E'): 'neg'
}
#==============================================================================
# adding amino acid properties to my dict of dicts
#%% Adding amino acid properties to my dict of dicts
#---------------------------------------------
# building dict: my_aa_dict
# FIXME: my_aa_dict.items:
# should be changed to 'low_3letter_dict'
#---------------------------------------------
for k, v in my_aa_dict.items():
#print (k,v)
v['aa_prop1'] = str() #initialise keys
@ -193,10 +190,46 @@ for k, v in my_aa_dict.items():
for group in aa_calcprop:
if v['one_letter_code'] in group:
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
# COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all
# associated properties for each aa
#==============================================================================
#%% end of script
#%%
#-----------------------------------
# building dict: oneletter_aa_dict
#-----------------------------------
for k, v in oneletter_aa_dict.items():
#print (k,v)
v['aa_prop1'] = str() #initialise keys
v['aa_prop2'] = list() #initialise keys (allows for overalpping properties)
v['aa_taylor'] = list() #initialise keys (allows for overalpping properties)
v['aa_prop_water'] = str() #initialise keys
v['aa_prop_polarity'] = str() #initialise keys
v['aa_calcprop'] = str() #initialise keys
for group in qualities1:
if k in group:
v['aa_prop1']+= qualities1[group] # += for str concat
for group in qualities2:
if k in group:
v['aa_prop2'].append(qualities2[group]) # append to list
for group in qualities_taylor:
if k in group:
v['aa_taylor'].append(qualities_taylor[group]) # append to list
for group in qualities_water:
if k in group:
v['aa_prop_water']+= qualities_water[group] # += for str concat
for group in qualities_polarity:
if k in group:
v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat
for group in aa_calcprop:
if k in group:
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
#%%
# COMMENT:VOILA! two different keytypes set for dicts containinga all
# associated aa properties
#==============================================================================
#%% end of script