237 lines
8.8 KiB
Python
Executable file
237 lines
8.8 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Tue Jun 18 11:32:28 2019
|
|
|
|
@author: tanushree
|
|
"""
|
|
#=======================================================================
|
|
# TASK: creating an aa dict to map aa 3-letter and 1-letter code to aa
|
|
# properties. Other mappings can also be created trivially by adpating
|
|
# the code
|
|
|
|
# Input: .csv file containing aa_code
|
|
# Output: sourced by other .py script to perform aa property mapping.
|
|
#=======================================================================
|
|
#%% load packages
|
|
import pandas as pd
|
|
import os
|
|
#=======================================================================
|
|
#%% specify homedir and curr dir
|
|
homedir = os.path.expanduser('~')
|
|
#=======================================================================
|
|
#%% variable assignment
|
|
#==========
|
|
# data dir
|
|
#==========
|
|
datadir = homedir + '/' + 'git/Data'
|
|
|
|
#=======
|
|
# input
|
|
#=======
|
|
in_filename = 'aa_codes.csv'
|
|
infile = datadir + '/' + in_filename
|
|
#print('Input filename:', in_filename
|
|
# , '\nInput path:', datadir
|
|
# , '\n============================================================')
|
|
|
|
#%% Read input file
|
|
aa_table = pd.read_csv(infile) #20, 6
|
|
|
|
#------------------------
|
|
#1) 3-letter (lower) code as key
|
|
#-------------------------
|
|
# assign the one_letter code as the row names so that it is easier to create
|
|
# a dict of dicts using index
|
|
#my_aa = pd.read_csv('aa_codes.csv', index_col = 0) #20, 6 #a way to it since it is the first column
|
|
my_aa = aa_table.set_index('three_letter_code_lower') #20, 5
|
|
my_aa.columns
|
|
my_aa.index
|
|
|
|
#==================
|
|
# convert file
|
|
# to dict of dicts
|
|
#====================
|
|
# convert each row into a dict of dicts so that there are 20 aa and 5 keys within
|
|
# with your choice of column name that you have assigned to index as the "primary key".
|
|
# using 'index' creates a dict of dicts
|
|
# using 'records' creates a list of dicts
|
|
|
|
#FIXME: remove this and replace with below as shown
|
|
my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
|
|
#print('Printing my_aa_dict:', my_aa_dict.keys())
|
|
|
|
#FIXME : use the below in all code
|
|
low_3letter_dict = my_aa.to_dict('index') #20, with 5 subkeys
|
|
#print('Printing lower-case 3 letter aa dict:',low_3letter_dict.keys())
|
|
|
|
#------------------------
|
|
#2) 1-letter code as key
|
|
#-------------------------
|
|
aa_1let = aa_table.set_index('one_letter_code') #20, 5
|
|
aa_1let.columns
|
|
aa_1let.index
|
|
|
|
oneletter_aa_dict = aa_1let.to_dict('index') #20, with 5 subkeys
|
|
#print('Printing one letter aa dict:', oneletter_aa_dict.keys())
|
|
|
|
#------------------------
|
|
#3) amino acid name as key
|
|
#-------------------------
|
|
aa_name = aa_table.set_index('amino_acid_name') #20, 5
|
|
aa_name.columns
|
|
aa_name.index
|
|
|
|
aa_name_dict = aa_name.to_dict('index') #20, with 5 subkeys
|
|
#print('Printing amino acid names aa dict:', aa_name_dict.keys())
|
|
|
|
#------------------------
|
|
#3) 3 letter uppercase as key
|
|
#-------------------------
|
|
aa_up3let = aa_table.set_index('three_letter_code_upper') #20, 5
|
|
aa_up3let.columns
|
|
aa_up3let.index
|
|
|
|
up_3letter_aa_dict = aa_up3let.to_dict('index') #20, with 5 subkeys
|
|
#print('Printing upper case 3 letter aa dict:', up_3letter_aa_dict.keys())
|
|
|
|
#================================================
|
|
# dict of aa with their corresponding properties
|
|
#================================================
|
|
# 7 categories: no overlap
|
|
qualities1 = { ('R', 'H', 'K'): 'Basic'
|
|
, ('D', 'E'): 'Acidic'
|
|
, ('N', 'Q'): 'Amidic'
|
|
, ('G', 'A', 'V', 'L', 'I', 'P'): 'Hydrophobic'
|
|
, ('S', 'T'): 'Hydroxylic'
|
|
, ('F', 'W', 'Y'): 'Aromatic'
|
|
, ('C', 'M'): 'Sulphur'
|
|
}
|
|
|
|
# 9 categories: allowing for overlap
|
|
qualities2 = { ('R', 'H', 'K'): 'Basic'
|
|
, ('D', 'E'): 'Acidc'
|
|
, ('S', 'T', 'N', 'Q'): 'Polar'
|
|
, ('V', 'I', 'L', 'M', 'F', 'Y', 'W'): 'Hydrophobic'
|
|
, ('S', 'T', 'H', 'N', 'Q', 'E', 'D', 'K', 'R'): 'Hydrophilic'
|
|
, ('S', 'G', 'A', 'P'): 'Small'
|
|
, ('F', 'W', 'Y', 'H'): 'Aromatic'
|
|
, ('V', 'I', 'L', 'M'): 'Aliphatic'
|
|
, ('C', 'G', 'P'): 'Special'
|
|
}
|
|
|
|
# taylor classification: allowing for overlap
|
|
qualities_taylor = { ('R', 'H', 'K'): 'Basic'
|
|
, ('D', 'E'): 'Acidc'
|
|
, ('S', 'T', 'N', 'Q', 'C', 'Y', 'W', 'H', 'K', 'R', 'D', 'E'): 'Polar'
|
|
, ('V', 'I', 'L', 'M', 'F', 'Y', 'W', 'C', 'A', 'G', 'T', 'H'): 'Hydrophobic'
|
|
#, ('S', 'T', 'H', 'N', 'Q', 'E', 'D', 'K', 'R'): 'Hydrophilic', #C, W, y MISSING FROM POLAR!
|
|
, ('S', 'G', 'A', 'P', 'C', 'T', 'N', 'D', 'V'): 'Small'
|
|
, ('F', 'W', 'Y', 'H'): 'Aromatic'
|
|
, ('V', 'I', 'L', 'M'): 'Aliphatic' #although M is not strictly in the circle!
|
|
, ('C', 'G', 'P'): 'Special'
|
|
}
|
|
|
|
# ternary classification: hydrophobic --> neutral --> hydrophilic (KD scale)
|
|
#http://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/IMGTclasses.html
|
|
qualities_water = { ('I','V','L','F','C','M','A','W'): 'hydrophobic'
|
|
, ('G','T','S','Y','P','H'): 'neutral'
|
|
, ('N','D','Q','E','K','R'): 'hydrophilic'
|
|
}
|
|
|
|
# polarity: no overlap
|
|
qualities_polarity = { ('D', 'E'): 'acidic'
|
|
, ('H', 'K', 'R'): 'basic'
|
|
, ('C', 'G', 'N', 'Q', 'S', 'T', 'Y'): 'neutral'
|
|
, ('A', 'F', 'I', 'L', 'M', 'P', 'V', 'W'): 'non-polar'
|
|
}
|
|
|
|
# almost same as the one above but as pos, neg, polar and non-polar
|
|
aa_calcprop = { ('D', 'E'): 'neg'
|
|
, ('H', 'K', 'R'): 'pos'
|
|
, ('N', 'Q', 'S', 'T', 'Y'): 'polar'
|
|
, ('C', 'G', 'A', 'F', 'I', 'L', 'M', 'P', 'V', 'W'): 'non-polar'
|
|
}
|
|
|
|
#==============================================================================
|
|
#%% Adding amino acid properties to my dict of dicts
|
|
#---------------------------------------------
|
|
# building dict: my_aa_dict
|
|
# FIXME: my_aa_dict.items:
|
|
# should be changed to 'low_3letter_dict'
|
|
#---------------------------------------------
|
|
for k, v in my_aa_dict.items():
|
|
#print (k,v)
|
|
v['aa_prop1'] = str() #initialise keys
|
|
v['aa_prop2'] = list() #initialise keys (allows for overalpping properties)
|
|
v['aa_taylor'] = list() #initialise keys (allows for overalpping properties)
|
|
v['aa_prop_water'] = str() #initialise keys
|
|
v['aa_prop_polarity'] = str() #initialise keys
|
|
v['aa_calcprop'] = str() #initialise keys
|
|
|
|
for group in qualities1:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_prop1']+= qualities1[group] # += for str concat
|
|
|
|
for group in qualities2:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_prop2'].append(qualities2[group]) # append to list
|
|
|
|
for group in qualities_taylor:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_taylor'].append(qualities_taylor[group]) # append to list
|
|
|
|
for group in qualities_water:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_prop_water']+= qualities_water[group] # += for str concat
|
|
|
|
for group in qualities_polarity:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat
|
|
|
|
for group in aa_calcprop:
|
|
if v['one_letter_code'] in group:
|
|
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
|
|
|
#%%
|
|
#-----------------------------------
|
|
# building dict: oneletter_aa_dict
|
|
#-----------------------------------
|
|
for k, v in oneletter_aa_dict.items():
|
|
#print (k,v)
|
|
v['aa_prop1'] = str() #initialise keys
|
|
v['aa_prop2'] = list() #initialise keys (allows for overalpping properties)
|
|
v['aa_taylor'] = list() #initialise keys (allows for overalpping properties)
|
|
v['aa_prop_water'] = str() #initialise keys
|
|
v['aa_prop_polarity'] = str() #initialise keys
|
|
v['aa_calcprop'] = str() #initialise keys
|
|
|
|
for group in qualities1:
|
|
if k in group:
|
|
v['aa_prop1']+= qualities1[group] # += for str concat
|
|
|
|
for group in qualities2:
|
|
if k in group:
|
|
v['aa_prop2'].append(qualities2[group]) # append to list
|
|
|
|
for group in qualities_taylor:
|
|
if k in group:
|
|
v['aa_taylor'].append(qualities_taylor[group]) # append to list
|
|
|
|
for group in qualities_water:
|
|
if k in group:
|
|
v['aa_prop_water']+= qualities_water[group] # += for str concat
|
|
|
|
for group in qualities_polarity:
|
|
if k in group:
|
|
v['aa_prop_polarity']+= qualities_polarity[group] # += for str concat
|
|
|
|
for group in aa_calcprop:
|
|
if k in group:
|
|
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
|
|
|
#%%
|
|
# COMMENT:VOILA! two different keytypes set for dicts containinga all
|
|
# associated aa properties
|
|
#==============================================================================
|
|
#%% end of script
|