added another aa dict type to reference_dict.py and calculated electrostatisc changes for muts based on adding these properties to mcsm mut stule snps. This will allow the calculation on a given file type since the ref dict can now easily be adapted.
This commit is contained in:
parent
f88e2665e9
commit
8fa9faa17d
2 changed files with 68 additions and 24 deletions
|
@ -29,6 +29,8 @@ homedir = os.path.expanduser('~')
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
|
|
||||||
|
from reference_dict import oneletter_aa_dict
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% command line args
|
#%% command line args
|
||||||
arg_parser = argparse.ArgumentParser()
|
arg_parser = argparse.ArgumentParser()
|
||||||
|
@ -43,8 +45,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') #
|
||||||
|
|
||||||
args = arg_parser.parse_args()
|
args = arg_parser.parse_args()
|
||||||
#%% variable assignment: input and output
|
#%% variable assignment: input and output
|
||||||
#drug = 'pyrazinamide'
|
|
||||||
#gene = 'pncA'
|
|
||||||
|
|
||||||
drug = args.drug
|
drug = args.drug
|
||||||
gene = args.gene
|
gene = args.gene
|
||||||
|
@ -101,6 +101,17 @@ list(comb_df.columns)
|
||||||
|
|
||||||
# clear variables
|
# clear variables
|
||||||
del(in_filename, infile_merged_df3)
|
del(in_filename, infile_merged_df3)
|
||||||
|
#%%
|
||||||
|
#----------------------------------------------------------------
|
||||||
|
# add aa properties considering df has columns:
|
||||||
|
# 'wild_type', 'mutant_type' separately as single letter aa code
|
||||||
|
#----------------------------------------------------------------
|
||||||
|
lookup_dict = dict()
|
||||||
|
for k, v in oneletter_aa_dict.items():
|
||||||
|
lookup_dict[k] = v['aa_calcprop']
|
||||||
|
#print(lookup_dict)
|
||||||
|
comb_df['wt_calcprop'] = comb_df['wild_type'].map(lookup_dict)
|
||||||
|
comb_df['mut_calcprop'] = comb_df['mutant_type'].map(lookup_dict)
|
||||||
|
|
||||||
#%% subset unique mutations
|
#%% subset unique mutations
|
||||||
df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')
|
df = comb_df.drop_duplicates(['mutationinformation'], keep = 'first')
|
||||||
|
|
|
@ -6,13 +6,12 @@ Created on Tue Jun 18 11:32:28 2019
|
||||||
@author: tanushree
|
@author: tanushree
|
||||||
"""
|
"""
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# TASK: creating an aa dict to map 3 letter and other combinations of
|
# TASK: creating an aa dict to map aa 3-letter and 1-letter code to aa
|
||||||
# aa codes to one-letter aa code and also with aa properties.
|
# properties. Other mappings can also be created trivially by adpating
|
||||||
|
# the code
|
||||||
|
|
||||||
# Input: .csv file containing aa_code
|
# Input: .csv file containing aa_code
|
||||||
|
# Output: sourced by other .py script to perform aa property mapping.
|
||||||
# Output: is called by other .py script to perform this mapping.
|
|
||||||
|
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% load packages
|
#%% load packages
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -20,17 +19,8 @@ import os
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% specify homedir and curr dir
|
#%% specify homedir and curr dir
|
||||||
homedir = os.path.expanduser('~')
|
homedir = os.path.expanduser('~')
|
||||||
|
|
||||||
# set working dir
|
|
||||||
#os.getcwd()
|
|
||||||
#os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
|
||||||
#os.getcwd()
|
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% variable assignment: input and output
|
#%% variable assignment
|
||||||
#drug = 'pyrazinamide'
|
|
||||||
#gene = 'pncA'
|
|
||||||
#gene_match = gene + '_p.'
|
|
||||||
|
|
||||||
#==========
|
#==========
|
||||||
# data dir
|
# data dir
|
||||||
#==========
|
#==========
|
||||||
|
@ -66,6 +56,8 @@ my_aa.index
|
||||||
# with your choice of column name that you have assigned to index as the "primary key".
|
# with your choice of column name that you have assigned to index as the "primary key".
|
||||||
# using 'index' creates a dict of dicts
|
# using 'index' creates a dict of dicts
|
||||||
# using 'records' creates a list of dicts
|
# using 'records' creates a list of dicts
|
||||||
|
|
||||||
|
#FIXME: remove this and replace with below as shown
|
||||||
my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
|
my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
|
||||||
#print('Printing my_aa_dict:', my_aa_dict.keys())
|
#print('Printing my_aa_dict:', my_aa_dict.keys())
|
||||||
|
|
||||||
|
@ -160,7 +152,12 @@ aa_calcprop = { ('D', 'E'): 'neg'
|
||||||
}
|
}
|
||||||
|
|
||||||
#==============================================================================
|
#==============================================================================
|
||||||
# adding amino acid properties to my dict of dicts
|
#%% Adding amino acid properties to my dict of dicts
|
||||||
|
#---------------------------------------------
|
||||||
|
# building dict: my_aa_dict
|
||||||
|
# FIXME: my_aa_dict.items:
|
||||||
|
# should be changed to 'low_3letter_dict'
|
||||||
|
#---------------------------------------------
|
||||||
for k, v in my_aa_dict.items():
|
for k, v in my_aa_dict.items():
|
||||||
#print (k,v)
|
#print (k,v)
|
||||||
v['aa_prop1'] = str() #initialise keys
|
v['aa_prop1'] = str() #initialise keys
|
||||||
|
@ -193,10 +190,46 @@ for k, v in my_aa_dict.items():
|
||||||
for group in aa_calcprop:
|
for group in aa_calcprop:
|
||||||
if v['one_letter_code'] in group:
|
if v['one_letter_code'] in group:
|
||||||
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
||||||
|
|
||||||
# COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all
|
|
||||||
# associated properties for each aa
|
|
||||||
#==============================================================================
|
|
||||||
#%% end of script
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
#-----------------------------------
|
||||||
|
# building dict: oneletter_aa_dict
|
||||||
|
#-----------------------------------
|
||||||
|
for k, v in oneletter_aa_dict.items():
|
||||||
|
#print (k,v)
|
||||||
|
v['aa_prop1'] = str() #initialise keys
|
||||||
|
v['aa_prop2'] = list() #initialise keys (allows for overalpping properties)
|
||||||
|
v['aa_taylor'] = list() #initialise keys (allows for overalpping properties)
|
||||||
|
v['aa_prop_water'] = str() #initialise keys
|
||||||
|
v['aa_prop_polarity'] = str() #initialise keys
|
||||||
|
v['aa_calcprop'] = str() #initialise keys
|
||||||
|
|
||||||
|
for group in qualities1:
|
||||||
|
if k in group:
|
||||||
|
v['aa_prop1']+= qualities1[group] # += for str concat
|
||||||
|
|
||||||
|
for group in qualities2:
|
||||||
|
if k in group:
|
||||||
|
v['aa_prop2'].append(qualities2[group]) # append to list
|
||||||
|
|
||||||
|
for group in qualities_taylor:
|
||||||
|
if k in group:
|
||||||
|
v['aa_taylor'].append(qualities_taylor[group]) # append to list
|
||||||
|
|
||||||
|
for group in qualities_water:
|
||||||
|
if k in group:
|
||||||
|
v['aa_prop_water']+= qualities_water[group] # += for str concat
|
||||||
|
|
||||||
|
for group in qualities_polarity:
|
||||||
|
if k in group:
|
||||||
|
v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat
|
||||||
|
|
||||||
|
for group in aa_calcprop:
|
||||||
|
if k in group:
|
||||||
|
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
||||||
|
|
||||||
|
#%%
|
||||||
|
# COMMENT:VOILA! two different keytypes set for dicts containinga all
|
||||||
|
# associated aa properties
|
||||||
|
#==============================================================================
|
||||||
|
#%% end of script
|
Loading…
Add table
Add a link
Reference in a new issue