tidy code and renamed kd.py to kd_df.py
This commit is contained in:
parent
4c2fa2b600
commit
0b7a938fbd
6 changed files with 156 additions and 194 deletions
111
meta_data_analysis/reference_dict.py
Executable file → Normal file
111
meta_data_analysis/reference_dict.py
Executable file → Normal file
|
@ -5,58 +5,82 @@ Created on Tue Jun 18 11:32:28 2019
|
|||
|
||||
@author: tanushree
|
||||
"""
|
||||
############################################
|
||||
# load libraries
|
||||
#=======================================================================
|
||||
# TASK: creating an aa dict to map 3 letter and other combinations of
|
||||
# aa codes to one-letter aa code and also with aa properties.
|
||||
|
||||
# Input: .csv file containing aa_code
|
||||
|
||||
# Output: is called by other .py script to perform this mapping.
|
||||
|
||||
#=======================================================================
|
||||
#%% load packages
|
||||
import pandas as pd
|
||||
import os
|
||||
#############################################
|
||||
#=======================================================================
|
||||
#%% specify homedir and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
#!#########################!
|
||||
# REQUIREMNETS:
|
||||
# Data/ must exist
|
||||
# containing GWAS data
|
||||
#!#########################!
|
||||
|
||||
print(os.getcwd())
|
||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
||||
os.chdir(homedir + '/git/Data/pyrazinamide/input/original')
|
||||
print(os.getcwd())
|
||||
|
||||
#%%
|
||||
############# specify variables for input and output paths and filenames
|
||||
# set working dir
|
||||
#os.getcwd()
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||
#os.getcwd()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output
|
||||
drug = 'pyrazinamide'
|
||||
#gene = 'pnca'
|
||||
datadir = homedir + '/git/Data'
|
||||
basedir = datadir + '/' + drug + '/input'
|
||||
gene = 'pncA'
|
||||
gene_match = gene + '_p.'
|
||||
|
||||
#==========
|
||||
# data dir
|
||||
#==========
|
||||
#indir = 'git/Data/pyrazinamide/input/original'
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
#=======
|
||||
# input
|
||||
inpath = "/original"
|
||||
in_filename = "/aa_codes.csv"
|
||||
infile = basedir + inpath + in_filename
|
||||
print(infile)
|
||||
#=======
|
||||
indir = datadir + '/' + drug + 'input'
|
||||
in_filename = 'aa_codes.csv'
|
||||
infile = indir + '/' + in_filename
|
||||
print('Input filename:', in_filename
|
||||
, '\nInput path:', indir)
|
||||
|
||||
#==========
|
||||
#read file
|
||||
#==========
|
||||
#=======
|
||||
# output: No output
|
||||
#=======
|
||||
|
||||
#outdir = datadir + '/' + drug + '/' + 'output'
|
||||
#out_filename = ''
|
||||
#outfile = outdir + '/' + out_filename
|
||||
#print('Output filename:', out_filename
|
||||
# , '\nOutput path:', outdir)
|
||||
|
||||
#%% end of variable assignment for input and output files
|
||||
#=======================================================================
|
||||
#%% Read input file
|
||||
my_aa = pd.read_csv(infile) #20, 6
|
||||
#assign the one_letter code as the row names so that it is easier to create a dict of dicts using index
|
||||
|
||||
# assign the one_letter code as the row names so that it is easier to create
|
||||
# a dict of dicts using index
|
||||
#my_aa = pd.read_csv('aa_codes.csv', index_col = 0) #20, 6 #a way to it since it is the first column
|
||||
my_aa = my_aa.set_index('three_letter_code_lower') #20, 5
|
||||
|
||||
#=========================================================
|
||||
#convert file to dict of dicts
|
||||
#=========================================================
|
||||
#convert each row into a dict of dicts so that there are 20 aa and 5 keys within
|
||||
#with your choice of column name that you have assigned to index as the "primary key".
|
||||
#using 'index' creates a dict of dicts
|
||||
#using 'records' creates a list of dicts
|
||||
#==================
|
||||
# convert file
|
||||
# to dict of dicts
|
||||
#====================
|
||||
# convert each row into a dict of dicts so that there are 20 aa and 5 keys within
|
||||
# with your choice of column name that you have assigned to index as the "primary key".
|
||||
# using 'index' creates a dict of dicts
|
||||
# using 'records' creates a list of dicts
|
||||
my_aa_dict = my_aa.to_dict('index') #20, with 5 subkeys
|
||||
|
||||
#================================================
|
||||
#dict of aa with their corresponding properties
|
||||
#This is defined twice
|
||||
# dict of aa with their corresponding properties
|
||||
# This is defined twice
|
||||
#================================================
|
||||
#7 categories: no overlap
|
||||
# 7 categories: no overlap
|
||||
qualities1 = { ('R', 'H', 'K'): 'Basic'
|
||||
, ('D', 'E'): 'Acidic'
|
||||
, ('N', 'Q'): 'Amidic'
|
||||
|
@ -66,7 +90,7 @@ qualities1 = { ('R', 'H', 'K'): 'Basic'
|
|||
, ('C', 'M'): 'Sulphur'
|
||||
}
|
||||
|
||||
#9 categories: allowing for overlap
|
||||
# 9 categories: allowing for overlap
|
||||
qualities2 = { ('R', 'H', 'K'): 'Basic'
|
||||
, ('D', 'E'): 'Acidc'
|
||||
, ('S', 'T', 'N', 'Q'): 'Polar'
|
||||
|
@ -78,6 +102,7 @@ qualities2 = { ('R', 'H', 'K'): 'Basic'
|
|||
, ('C', 'G', 'P'): 'Special'
|
||||
}
|
||||
|
||||
# taylor classification: allowing for overlap
|
||||
qualities_taylor = { ('R', 'H', 'K'): 'Basic'
|
||||
, ('D', 'E'): 'Acidc'
|
||||
, ('S', 'T', 'N', 'Q', 'C', 'Y', 'W', 'H', 'K', 'R', 'D', 'E'): 'Polar'
|
||||
|
@ -89,17 +114,19 @@ qualities_taylor = { ('R', 'H', 'K'): 'Basic'
|
|||
, ('C', 'G', 'P'): 'Special'
|
||||
}
|
||||
|
||||
# binary classification: hydrophilic or hydrophobic
|
||||
qualities_water = { ('D', 'E', 'N', 'P', 'Q', 'R', 'S'): 'hydrophilic'
|
||||
, ('A', 'C', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'T', 'V', 'W', 'X', 'Y'): 'hydrophobic'
|
||||
}
|
||||
|
||||
# polarity: no overlap
|
||||
qualities_polarity = { ('D', 'E'): 'acidic'
|
||||
, ('H', 'K', 'R'): 'basic'
|
||||
, ('C', 'G', 'N', 'Q', 'S', 'T', 'Y'): 'neutral'
|
||||
, ('A', 'F', 'I', 'L', 'M', 'P', 'V', 'W'): 'non-polar'
|
||||
}
|
||||
|
||||
# almost same as the one above
|
||||
# almost same as the one above but as pos, neg, polar and non-polar
|
||||
aa_calcprop = { ('D', 'E'): 'neg'
|
||||
, ('H', 'K', 'R'): 'pos'
|
||||
, ('N', 'Q', 'S', 'T', 'Y'): 'polar'
|
||||
|
@ -107,7 +134,7 @@ aa_calcprop = { ('D', 'E'): 'neg'
|
|||
}
|
||||
|
||||
#==============================================================================
|
||||
#adding amino acid properties to my dict of dicts
|
||||
# adding amino acid properties to my dict of dicts
|
||||
for k, v in my_aa_dict.items():
|
||||
#print (k,v)
|
||||
v['aa_prop1'] = str() #initialise keys
|
||||
|
@ -141,7 +168,9 @@ for k, v in my_aa_dict.items():
|
|||
if v['one_letter_code'] in group:
|
||||
v['aa_calcprop']+= aa_calcprop[group] # += for str concat
|
||||
|
||||
#COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all associated properties for each aa
|
||||
# COMMENT:VOILA!!! my_aa_dict is now a dict of dicts containing all
|
||||
# associated properties for each aa
|
||||
#==============================================================================
|
||||
#%% end of script
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue