added another aa dict type to reference_dict.py and calculated electrostatisc changes for muts based on adding these properties to mcsm mut stule snps. This will allow the calculation on a given file type since the ref dict can now easily be adapted.

2021-06-11 17:12:21 +01:00 · 2021-06-11 17:12:21 +01:00 · 6dd8cc6f44
commit 6dd8cc6f44
parent 6e8116bc16
2 changed files with 68 additions and 24 deletions
--- a/scripts/mut_electrostatic_changes.py
+++ b/scripts/mut_electrostatic_changes.py
@ -29,6 +29,8 @@ homedir = os.path.expanduser('~')
 os.getcwd()
 os.chdir(homedir + '/git/LSHTM_analysis/scripts')
 os.getcwd()
+
+from reference_dict import oneletter_aa_dict 
 #=======================================================================
 #%% command line args
 arg_parser = argparse.ArgumentParser()
@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') #

 args = arg_parser.parse_args()
 #%% variable assignment: input and output 
-#drug = 'pyrazinamide'
-#gene = 'pncA'

 drug    = args.drug
 gene    = args.gene
@ -101,6 +101,17 @@ list(comb_df.columns)

 # clear variables
 del(in_filename, infile_merged_df3)
+#%%
+#----------------------------------------------------------------
+# add aa properties considering df has columns:
+# 'wild_type', 'mutant_type' separately as single letter aa code
+#----------------------------------------------------------------
+lookup_dict = dict()
+for k, v in oneletter_aa_dict.items():
+    lookup_dict[k] = v['aa_calcprop']
+    #print(lookup_dict)
+    comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict)   
+    comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict)

 #%% subset unique mutations
 df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')