diff --git a/scripts/mut_electrostatic_changes.py b/scripts/mut_electrostatic_changes.py index 6620313..243ff93 100755 --- a/scripts/mut_electrostatic_changes.py +++ b/scripts/mut_electrostatic_changes.py @@ -29,6 +29,8 @@ homedir = os.path.expanduser('~') os.getcwd() os.chdir(homedir + '/git/LSHTM_analysis/scripts') os.getcwd() + +from reference_dict import oneletter_aa_dict #======================================================================= #%% command line args arg_parser = argparse.ArgumentParser() @@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') # args = arg_parser.parse_args() #%% variable assignment: input and output -#drug = 'pyrazinamide' -#gene = 'pncA' drug = args.drug gene = args.gene @@ -101,6 +101,17 @@ list(comb_df.columns) # clear variables del(in_filename, infile_merged_df3) +#%% +#---------------------------------------------------------------- +# add aa properties considering df has columns: +# 'wild_type', 'mutant_type' separately as single letter aa code +#---------------------------------------------------------------- +lookup_dict = dict() +for k, v in oneletter_aa_dict.items(): + lookup_dict[k] = v['aa_calcprop'] + #print(lookup_dict) + comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict) + comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict) #%% subset unique mutations df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first') diff --git a/scripts/reference_dict.py b/scripts/reference_dict.py index eff2211..ee431c3 100755 --- a/scripts/reference_dict.py +++ b/scripts/reference_dict.py @@ -6,13 +6,12 @@ Created on Tue Jun 18 11:32:28 2019 @author: tanushree """ #======================================================================= -# TASK: creating an aa dict to map 3 letter and other combinations of -# aa codes to one-letter aa code and also with aa properties. +# TASK: creating an aa dict to map aa 3-letter and 1-letter code to aa +# properties. Other mappings can also be created trivially by adpating +# the code # Input: .csv file containing aa_code - -# Output: is called by other .py script to perform this mapping. - +# Output: sourced by other .py script to perform aa property mapping. #======================================================================= #%% load packages import pandas as pd @@ -20,17 +19,8 @@ import os #======================================================================= #%% specify homedir and curr dir homedir = os.path.expanduser('~') - -# set working dir -#os.getcwd() -#os.chdir(homedir + '/git/LSHTM_analysis/scripts') -#os.getcwd() #======================================================================= -#%% variable assignment: input and output -#drug = 'pyrazinamide' -#gene = 'pncA' -#gene_match = gene + '_p.' - +#%% variable assignment #========== # data dir #========== @@ -66,6 +56,8 @@ my_aa.index # with your choice of column name that you have assigned to index as the "primary key". # using 'index' creates a dict of dicts # using 'records' creates a list of dicts + +#FIXME: remove this and replace with below as shown my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys #print('Printing my_aa_dict:', my_aa_dict.keys()) @@ -160,7 +152,12 @@ aa_calcprop = { ('D', 'E'): 'neg' } #============================================================================== -# adding amino acid properties to my dict of dicts +#%% Adding amino acid properties to my dict of dicts +#--------------------------------------------- +# building dict: my_aa_dict +# FIXME: my_aa_dict.items: +# should be changed to 'low_3letter_dict' +#--------------------------------------------- for k, v in my_aa_dict.items(): #print (k,v) v['aa_prop1'] = str() #initialise keys @@ -193,10 +190,46 @@ for k, v in my_aa_dict.items(): for group in aa_calcprop: if v['one_letter_code'] in group: v['aa_calcprop']+= aa_calcprop[group] # += for str concat - -# COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all -# associated properties for each aa -#============================================================================== -#%% end of script - +#%% +#----------------------------------- +# building dict: oneletter_aa_dict +#----------------------------------- +for k, v in oneletter_aa_dict.items(): + #print (k,v) + v['aa_prop1'] = str() #initialise keys + v['aa_prop2'] = list() #initialise keys (allows for overalpping properties) + v['aa_taylor'] = list() #initialise keys (allows for overalpping properties) + v['aa_prop_water'] = str() #initialise keys + v['aa_prop_polarity'] = str() #initialise keys + v['aa_calcprop'] = str() #initialise keys + + for group in qualities1: + if k in group: + v['aa_prop1']+= qualities1[group] # += for str concat + + for group in qualities2: + if k in group: + v['aa_prop2'].append(qualities2[group]) # append to list + + for group in qualities_taylor: + if k in group: + v['aa_taylor'].append(qualities_taylor[group]) # append to list + + for group in qualities_water: + if k in group: + v['aa_prop_water']+= qualities_water[group] # += for str concat + + for group in qualities_polarity: + if k in group: + v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat + + for group in aa_calcprop: + if k in group: + v['aa_calcprop']+= aa_calcprop[group] # += for str concat + +#%% +# COMMENT:VOILA! two different keytypes set for dicts containinga all +# associated aa properties +#============================================================================== +#%% end of script \ No newline at end of file