added another aa dict type to reference_dict.py and calculated electrostatisc changes for muts based on adding these properties to mcsm mut stule snps. This will allow the calculation on a given file type since the ref dict can now easily be adapted.

This commit is contained in:
Tanushree Tunstall 2021-06-11 17:12:21 +01:00
parent 6e8116bc16
commit 6dd8cc6f44
2 changed files with 68 additions and 24 deletions

View file

@ -29,6 +29,8 @@ homedir = os.path.expanduser('~')
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
from reference_dict import oneletter_aa_dict
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') #
args = arg_parser.parse_args()
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
drug = args.drug
gene = args.gene
@ -101,6 +101,17 @@ list(comb_df.columns)
# clear variables
del(in_filename, infile_merged_df3)
#%%
#----------------------------------------------------------------
# add aa properties considering df has columns:
# 'wild_type', 'mutant_type' separately as single letter aa code
#----------------------------------------------------------------
lookup_dict = dict()
for k, v in oneletter_aa_dict.items():
lookup_dict[k] = v['aa_calcprop']
#print(lookup_dict)
comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict)
comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict)
#%% subset unique mutations
df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')