added another aa dict type to reference_dict.py and calculated electrostatisc changes for muts based on adding these properties to mcsm mut stule snps. This will allow the calculation on a given file type since the ref dict can now easily be adapted.

2021-06-11 17:12:21 +01:00 · 2021-06-11 17:12:21 +01:00 · 8fa9faa17d
commit 8fa9faa17d
parent f88e2665e9
2 changed files with 68 additions and 24 deletions
--- a/scripts/mut_electrostatic_changes.py
+++ b/scripts/mut_electrostatic_changes.py
@ -29,6 +29,8 @@ homedir = os.path.expanduser('~')
 os.getcwd()
 os.chdir(homedir + '/git/LSHTM_analysis/scripts')
 os.getcwd()
+
+from reference_dict import oneletter_aa_dict 
 #=======================================================================
 #%% command line args
 arg_parser = argparse.ArgumentParser()
@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') #

 args = arg_parser.parse_args()
 #%% variable assignment: input and output 
-#drug = 'pyrazinamide'
-#gene = 'pncA'

 drug    = args.drug
 gene    = args.gene
@ -101,6 +101,17 @@ list(comb_df.columns)

 # clear variables
 del(in_filename, infile_merged_df3)
+#%%
+#----------------------------------------------------------------
+# add aa properties considering df has columns:
+# 'wild_type', 'mutant_type' separately as single letter aa code
+#----------------------------------------------------------------
+lookup_dict = dict()
+for k, v in oneletter_aa_dict.items():
+    lookup_dict[k] = v['aa_calcprop']
+    #print(lookup_dict)
+    comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict)   
+    comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict)

 #%% subset unique mutations
 df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')
--- a/scripts/reference_dict.py
+++ b/scripts/reference_dict.py
@ -6,13 +6,12 @@ Created on Tue Jun 18 11:32:28 2019
@author: tanushree
 """
 #=======================================================================
-# TASK: creating an aa dict to map 3 letter and other combinations of
-# aa codes to one-letter aa code and also with aa properties.
+# TASK: creating an aa dict to map aa 3-letter and 1-letter code to aa 
+# properties. Other mappings can also be created trivially by adpating 
+# the code

 # Input: .csv file containing aa_code
-
-# Output: is called by other .py script to perform this mapping.
-
+# Output: sourced by other .py script to perform aa property mapping.
 #=======================================================================
 #%% load packages
 import pandas as pd
@ -20,17 +19,8 @@ import os
 #=======================================================================
 #%% specify homedir and curr dir
 homedir = os.path.expanduser('~')
-
-# set working dir
-#os.getcwd()
-#os.chdir(homedir + '/git/LSHTM_analysis/scripts')
-#os.getcwd()
 #=======================================================================
-#%% variable assignment: input and output
-#drug = 'pyrazinamide'
-#gene = 'pncA'
-#gene_match = gene + '_p.'
-
+#%% variable assignment
 #==========
 # data dir
 #==========
@ -66,6 +56,8 @@ my_aa.index
 # with your choice of column name that you have assigned to index as the "primary key". 
 # using 'index' creates a dict of dicts
 # using 'records' creates a list of dicts
+
+#FIXME: remove this and replace with below as shown
 my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
 #print('Printing my_aa_dict:', my_aa_dict.keys())

@ -160,7 +152,12 @@ aa_calcprop = { ('D', 'E'): 'neg'
 }

 #==============================================================================                
-# adding amino acid properties to my dict of dicts                      
+#%% Adding amino acid properties to my dict of dicts
+#---------------------------------------------
+# building dict: my_aa_dict
+# FIXME: my_aa_dict.items:
+#   should be changed to 'low_3letter_dict'
+#---------------------------------------------
 for k, v in my_aa_dict.items():
    #print (k,v)
    v['aa_prop1'] = str() #initialise keys 
@ -193,10 +190,46 @@ for k, v in my_aa_dict.items():
    for group in aa_calcprop:
        if v['one_letter_code'] in group:
            v['aa_calcprop']+= aa_calcprop[group] # += for str concat 
-             
-# COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all 
-# associated properties for each aa
-#==============================================================================
-#%% end of script
            
-  
+#%%
+#-----------------------------------
+# building dict: oneletter_aa_dict
+#-----------------------------------  
+for k, v in oneletter_aa_dict.items():
+    #print (k,v)
+    v['aa_prop1'] = str() #initialise keys 
+    v['aa_prop2'] = list() #initialise keys (allows for overalpping properties)
+    v['aa_taylor'] = list() #initialise keys (allows for overalpping properties)
+    v['aa_prop_water'] = str() #initialise keys
+    v['aa_prop_polarity'] = str() #initialise keys
+    v['aa_calcprop'] = str() #initialise keys
+    
+    for group in qualities1:
+        if k in group:
+            v['aa_prop1']+= qualities1[group] # += for str concat   
+
+    for group in qualities2:
+        if k in group:
+            v['aa_prop2'].append(qualities2[group]) # append to list
+ 
+    for group in qualities_taylor:
+        if k in group:
+            v['aa_taylor'].append(qualities_taylor[group]) # append to list           
+            
+    for group in qualities_water:
+        if k in group:
+            v['aa_prop_water']+= qualities_water[group] # += for str concat          
+
+    for group in qualities_polarity:
+        if k in group:
+            v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat 
+            
+    for group in aa_calcprop:
+        if k in group:
+            v['aa_calcprop']+= aa_calcprop[group] # += for str concat 
+                         
+#%%                   
+# COMMENT:VOILA! two different keytypes set for dicts containinga all
+# associated aa properties
+#==============================================================================
+#%% end of script