tidied and updated kd and dssp scripts & generated their respective outputs
This commit is contained in:
parent
37e1d43b76
commit
73e0029b65
6 changed files with 209 additions and 181 deletions
|
@ -5,28 +5,78 @@ Created on Thu Feb 6 12:18:24 2020
|
||||||
|
|
||||||
@author: tanu
|
@author: tanu
|
||||||
"""
|
"""
|
||||||
#http://foldxsuite.crg.eu/faq-page#
|
#=======================================================================
|
||||||
# after fold x downlaoded, extract and run it from
|
# Task: Residue Depth (rd) values for amino acid sequence using the
|
||||||
#https://biopython.org/DIST/docs/api/Bio.PDB.ResidueDepth%27-module.html
|
# Depth server.
|
||||||
#proDepth: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0007072
|
# Depth server: http://cospi.iiserpune.ac.in/depth/htdocs/index.html
|
||||||
#Depth server: http://cospi.iiserpune.ac.in/depth/htdocs/index.html
|
# FIXME: for now input is a valid pdb code NOT a valid pdb file that you can upload
|
||||||
# needs biopython and msms
|
# Input: PDB file (valid pdb code)
|
||||||
|
|
||||||
# load libraries
|
# Output:
|
||||||
|
|
||||||
|
# useful links
|
||||||
|
# http://foldxsuite.crg.eu/faq-page#
|
||||||
|
# after fold x downlaoded, extract and run it from
|
||||||
|
# https://biopython.org/DIST/docs/api/Bio.PDB.ResidueDepth%27-module.html
|
||||||
|
# proDepth: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0007072
|
||||||
|
# needs biopython and msms
|
||||||
|
#=======================================================================
|
||||||
|
#%% load packages
|
||||||
import sys, os
|
import sys, os
|
||||||
|
import re
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from Bio.PDB.ResidueDepth import ResidueDepth
|
from Bio.PDB.ResidueDepth import ResidueDepth
|
||||||
from Bio.PDB.PDBParser import PDBParser
|
from Bio.PDB.PDBParser import PDBParser
|
||||||
from Bio.PDB.ResidueDepth import get_surface
|
from Bio.PDB.ResidueDepth import get_surface
|
||||||
|
|
||||||
#%%
|
#%% specify input and output variables
|
||||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
||||||
|
|
||||||
|
# set working dir
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
|
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
#%%
|
#=======================================================================
|
||||||
|
#%% variable assignment: input and output paths & filenames
|
||||||
|
drug = 'pyrazinamide'
|
||||||
|
gene = 'pncA'
|
||||||
|
gene_match = gene + '_p.'
|
||||||
|
|
||||||
|
#==========
|
||||||
|
# data dir
|
||||||
|
#==========
|
||||||
|
#indir = 'git/Data/pyrazinamide/input/original'
|
||||||
|
datadir = homedir + '/' + 'git/Data'
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# input
|
||||||
|
#=======
|
||||||
|
indir = datadir + '/' + drug + '/' + 'input'
|
||||||
|
in_filename = '3pl1.pdb'
|
||||||
|
infile = indir + '/' + in_filename
|
||||||
|
print('Input filename:', in_filename
|
||||||
|
, '\nInput path:', indir)
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
outdir = datadir + '/' + drug + '/' + 'output'
|
||||||
|
# specify output file
|
||||||
|
out_filename = 'XXX'
|
||||||
|
outfile = outdir + '/' + out_filename
|
||||||
|
print('Output filename: ', out_filename
|
||||||
|
, '\nOutput path: ', outdir)
|
||||||
|
|
||||||
|
#%% end of variable assignment for input and output files
|
||||||
|
#================================================================
|
||||||
|
# Read input pdb file
|
||||||
parser = PDBParser()
|
parser = PDBParser()
|
||||||
structure = parser.get_structure("3pl1", "/home/tanu/git/3pl1.pdb")
|
|
||||||
|
# extract the 3 letter pdb code
|
||||||
|
pdb_code = re.search(r'(^[0-9]{1}\w{3})', in_filename).group(1)
|
||||||
|
|
||||||
|
#structure = parser.get_structure("3pl1", "/home/tanu/git/3pl1.pdb")
|
||||||
|
structure = parser.get_structure(pdb_code, infile)
|
||||||
model = structure[0]
|
model = structure[0]
|
||||||
surface = get_surface(model)
|
surface = get_surface(model)
|
||||||
|
|
||||||
|
@ -38,7 +88,7 @@ rd.property_keys
|
||||||
baz = rd.property_list
|
baz = rd.property_list
|
||||||
|
|
||||||
|
|
||||||
#To calculate the residue depth (average atom depth of the atoms in a residue):
|
# To calculate the residue depth (average atom depth of the atoms in a residue):
|
||||||
from Bio.PDB.ResidueDepth import residue_depth
|
from Bio.PDB.ResidueDepth import residue_depth
|
||||||
chain = model['A']
|
chain = model['A']
|
||||||
res152 = chain[152]
|
res152 = chain[152]
|
||||||
|
|
|
@ -1,68 +1,85 @@
|
||||||
#!/home/tanu/anaconda3/envs/ContactMap/bin/python3
|
#!/home/tanu/anaconda3/envs/ContactMap/bin/python3
|
||||||
# Read a DSSP file into a data frame and pretty-print it
|
# Read a DSSP file into a data frame and pretty-print it
|
||||||
|
|
||||||
|
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||||
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||||
import sys, os
|
import sys, os
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
from Bio.PDB import PDBParser
|
||||||
|
from Bio.PDB.DSSP import DSSP
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pprint as pp
|
import pprint as pp
|
||||||
|
#from Bio.PDB.PDBParser import PDBParser
|
||||||
import dms_tools2
|
import dms_tools2
|
||||||
import dms_tools2.dssp
|
import dms_tools2.dssp
|
||||||
|
|
||||||
#%%
|
#%% specify input and output variables
|
||||||
# my working dir
|
homedir = os.path.expanduser('~')
|
||||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
|
||||||
os.getcwd()
|
|
||||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
|
|
||||||
os.getcwd()
|
|
||||||
#%%
|
|
||||||
# sample example
|
|
||||||
dssp_file = "./3pl1.dssp"
|
|
||||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
|
|
||||||
|
|
||||||
# outputs to console
|
#%% set working dir
|
||||||
#returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
|
os.getcwd()
|
||||||
#Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
|
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||||
|
os.getcwd()
|
||||||
|
#=======================================================================
|
||||||
|
#%% variable assignment: input and output paths & filenames
|
||||||
|
drug = 'pyrazinamide'
|
||||||
|
gene = 'pncA'
|
||||||
|
#gene_match = gene + '_p.'
|
||||||
|
|
||||||
|
#==========
|
||||||
|
# data dir
|
||||||
|
#==========
|
||||||
|
#indir = 'git/Data/pyrazinamide/input/original'
|
||||||
|
datadir = homedir + '/' + 'git/Data'
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# input
|
||||||
|
#=======
|
||||||
|
indir = datadir + '/' + drug + '/' + 'output'
|
||||||
|
#in_filename = 'pnca.dssp'
|
||||||
|
in_filename = gene.lower() +'.dssp'
|
||||||
|
infile = indir + '/' + in_filename
|
||||||
|
print('Input filename:', in_filename
|
||||||
|
, '\nInput path:', indir)
|
||||||
|
|
||||||
|
# specify PDB chain
|
||||||
|
my_chain = 'A'
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
outdir = datadir + '/' + drug + '/' + 'output'
|
||||||
|
out_filename = gene.lower() + '_dssp_df'
|
||||||
|
outfile = outdir + '/' + out_filename
|
||||||
|
print('Output filename:', out_filename
|
||||||
|
, '\nOutput path:', outdir
|
||||||
|
,'\nOutfile: ', outfile)
|
||||||
|
|
||||||
|
#%% end of variable assignment for input and output files
|
||||||
|
#================================================================
|
||||||
|
# Process dssp output and extract into df
|
||||||
|
dssp_file = infile
|
||||||
|
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
|
||||||
|
# returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
|
||||||
|
# Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
|
||||||
pp.pprint(dssp_df)
|
pp.pprint(dssp_df)
|
||||||
|
|
||||||
# write to csv
|
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
||||||
dssp_df.to_csv('3pl1_dssp_df', header=True, index = False)
|
# to be the same names as used in the file required for merging later.
|
||||||
|
dssp_df.columns
|
||||||
|
dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type'}, inplace = True)
|
||||||
|
dssp_df.columns
|
||||||
|
|
||||||
#%% specify variables for input and output paths and filenames
|
#%% Write ouput csv file
|
||||||
drug = "pyrazinamide"
|
print('Writing file:', outfile
|
||||||
#gene = "pnca"
|
, '\nFilename:', out_filename
|
||||||
|
, '\nPath:', outdir)
|
||||||
|
|
||||||
datadir = homedir + "/git/Data"
|
|
||||||
basedir = datadir + "/" + drug + "/input"
|
|
||||||
|
|
||||||
# input
|
|
||||||
inpath = "/processed"
|
|
||||||
in_filename = "/3pl1.dssp"
|
|
||||||
infile = basedir + inpath + in_filename
|
|
||||||
#print(infile)
|
|
||||||
|
|
||||||
# output file
|
|
||||||
outpath = "/output"
|
|
||||||
outdir = datadir + "/" + drug + outpath
|
|
||||||
out_filename = "/3pl1_dssp_df"
|
|
||||||
outfile = outdir + out_filename
|
|
||||||
print(outdir); print(outfile)
|
|
||||||
|
|
||||||
if not os.path.exists(datadir):
|
|
||||||
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
os.makedirs(datadir)
|
|
||||||
exit()
|
|
||||||
|
|
||||||
if not os.path.exists(outdir):
|
|
||||||
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
exit()
|
|
||||||
|
|
||||||
else:
|
|
||||||
print('Dir exists: Carrying on')
|
|
||||||
# end of variable assignment for input and output files
|
|
||||||
#%% <----- fixme
|
|
||||||
dssp_file = infile
|
|
||||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
|
|
||||||
|
|
||||||
#%%
|
|
||||||
# write to csv
|
# write to csv
|
||||||
dssp_df.to_csv(outfile, header=True, index = False)
|
dssp_df.to_csv(outfile, header=True, index = False)
|
||||||
|
|
||||||
|
print('Finished writing:', out_filename
|
||||||
|
, '\nNo. of rows:', len(dssp_df)
|
||||||
|
, '\nNo. of cols:', len(dssp_df.columns))
|
||||||
|
print('======================================================================')
|
|
@ -5,3 +5,32 @@
|
||||||
# - Create eg: '~/git/Data/{original,processed}
|
# - Create eg: '~/git/Data/{original,processed}
|
||||||
# - Create eg: '~/git/Data/processed/' + drug (for each drug)
|
# - Create eg: '~/git/Data/processed/' + drug (for each drug)
|
||||||
# - Create eg: '~/git/Data/output/' + drug + '{plots, structure}'
|
# - Create eg: '~/git/Data/output/' + drug + '{plots, structure}'
|
||||||
|
|
||||||
|
|
||||||
|
#%% specify homedir as python doesn't recognise tilde
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
|
||||||
|
#%% variable assignment: input and output paths & filenames
|
||||||
|
drug = 'pyrazinamide'
|
||||||
|
gene = 'pncA'
|
||||||
|
gene_match = gene + '_p.'
|
||||||
|
|
||||||
|
#==========
|
||||||
|
# data dir
|
||||||
|
#==========
|
||||||
|
#indir = 'git/Data/pyrazinamide/input/original'
|
||||||
|
datadir = homedir + '/' + 'git/Data'
|
||||||
|
|
||||||
|
#==========
|
||||||
|
# input dir
|
||||||
|
#==========
|
||||||
|
indir = datadir + '/' + drug + '/' + 'input'
|
||||||
|
|
||||||
|
#============
|
||||||
|
# output dir
|
||||||
|
#============
|
||||||
|
# several output files
|
||||||
|
outdir = datadir + '/' + drug + '/' + 'output'
|
||||||
|
|
||||||
|
#%%end of variable assignment for input and output files
|
||||||
|
#==============================================================================
|
||||||
|
|
|
@ -6,7 +6,8 @@ Created on Tue Aug 6 12:56:03 2019
|
||||||
@author: tanu
|
@author: tanu
|
||||||
'''
|
'''
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# Task: Hydrophobicity (Kd) values for amino acid sequence using the Kyt&-Doolittle
|
# Task: Hydrophobicity (Kd) values for amino acid sequence using the
|
||||||
|
# Kyt&-Doolittle.
|
||||||
# Same output as using the expasy server https://web.expasy.org/protscale/
|
# Same output as using the expasy server https://web.expasy.org/protscale/
|
||||||
|
|
||||||
# useful links
|
# useful links
|
||||||
|
@ -102,7 +103,7 @@ print('======================================================================')
|
||||||
# which will allow easy merging of the two dfs.
|
# which will allow easy merging of the two dfs.
|
||||||
|
|
||||||
# df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence)
|
# df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence)
|
||||||
# col name for wt is the same as reflected in the the AF_OR file to allow easy merging
|
# Name column of wt as 'wild_type' to be the same name used in the file required for merging later.
|
||||||
dfSeq = pd.DataFrame({'wild_type':list(sequence)})
|
dfSeq = pd.DataFrame({'wild_type':list(sequence)})
|
||||||
dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive
|
dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive
|
||||||
|
|
||||||
|
|
|
@ -1,72 +0,0 @@
|
||||||
#!/usr/bin/python
|
|
||||||
# Read a PDB and output DSSP to console
|
|
||||||
import sys, os
|
|
||||||
from Bio.PDB import PDBParser
|
|
||||||
from Bio.PDB.DSSP import DSSP
|
|
||||||
import pandas as pd
|
|
||||||
import pprint as pp
|
|
||||||
|
|
||||||
#%%
|
|
||||||
# TASK: read a pdb file and generate a dssp output file
|
|
||||||
# FIXME: Pending output dssp hasn't been generated
|
|
||||||
# needs dssp exe on linux
|
|
||||||
# may be easier to run the dssp exe locally
|
|
||||||
#%%
|
|
||||||
# my working dir
|
|
||||||
os.getcwd()
|
|
||||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
|
||||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
|
||||||
os.getcwd()
|
|
||||||
#%%
|
|
||||||
# specify variables for input and output paths and filenames
|
|
||||||
drug = "pyrazinamide"
|
|
||||||
#gene = "pnca"
|
|
||||||
|
|
||||||
datadir = homedir + "/git/Data"
|
|
||||||
basedir = datadir + "/" + drug + "/input"
|
|
||||||
|
|
||||||
# input
|
|
||||||
inpath = "/original"
|
|
||||||
|
|
||||||
# uncomment as necessary
|
|
||||||
in_filename = "/3pl1.pdb"
|
|
||||||
|
|
||||||
infile = basedir + inpath + in_filename
|
|
||||||
#print(infile)
|
|
||||||
|
|
||||||
# output file
|
|
||||||
outpath = "/processed"
|
|
||||||
outdir = datadir + "/" + drug + outpath
|
|
||||||
out_filename = "/3pl1.dssp"
|
|
||||||
outfile = outdir + out_filename
|
|
||||||
#print(outdir)
|
|
||||||
|
|
||||||
if not os.path.exists(datadir):
|
|
||||||
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
os.makedirs(datadir)
|
|
||||||
exit()
|
|
||||||
|
|
||||||
if not os.path.exists(outdir):
|
|
||||||
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
exit()
|
|
||||||
|
|
||||||
else:
|
|
||||||
print('Dir exists: Carrying on')
|
|
||||||
# end of variable assignment for input and output files
|
|
||||||
#%%
|
|
||||||
p = PDBParser()
|
|
||||||
structure = p.get_structure("3pl1", infile)
|
|
||||||
|
|
||||||
model = structure[0]
|
|
||||||
dssp = DSSP(model, infile)
|
|
||||||
#dssp = DSSP(model, infile, dssp='mkdssp') #incase you used DSSP2 exe
|
|
||||||
pp.pprint(dssp)
|
|
||||||
|
|
||||||
#DSSP data is accessed by a tuple - (chain id, residue id): RSA
|
|
||||||
a_key = list(dssp.keys())[3]
|
|
||||||
dssp[a_key]
|
|
||||||
|
|
||||||
pp.pprint(dssp.keys())
|
|
||||||
pp.pprint(dssp.property_dict)
|
|
||||||
pp.pprint(dssp.property_keys)
|
|
||||||
pp.pprint(dssp.property_list)
|
|
|
@ -1,51 +1,54 @@
|
||||||
#!/usr/bin/env python3
|
#!/bin/bash
|
||||||
# -*- coding: utf-8 -*-
|
#=======================================================================
|
||||||
"""
|
# Task: read a pdb file and generate a dssp output file
|
||||||
Created on Sun Feb 16 11:21:44 2020
|
# Input:
|
||||||
|
# pdb file
|
||||||
|
|
||||||
@author: tanu
|
# Output:
|
||||||
"""
|
# pdb_code.dssp
|
||||||
#!/usr/bin/bash
|
# needs dssp exe on linux
|
||||||
# Run dssp exe
|
# more efficient to run dssp exe locally
|
||||||
|
|
||||||
#%%
|
# note: double quotes for variable interpolation
|
||||||
# specify variables for input and output paths and filenames
|
#=======================================================================
|
||||||
drug="pyrazinamide"
|
# #%%specify variables for input and output paths and filenames
|
||||||
#gene = "pnca"
|
drug='pyrazinamide'
|
||||||
|
gene='pncA'
|
||||||
|
#convert to lowercase for consistency in filenames
|
||||||
|
gene_l=$(printf $gene | awk '{print tolower($0)}')
|
||||||
|
gene_match="${gene}_p."
|
||||||
|
#printf "${gene_match}\n"
|
||||||
|
|
||||||
datadir="~git/Data"
|
#==========
|
||||||
basedir=${datadir}"/"${drug}
|
# data dir
|
||||||
echo${basedir}
|
#==========
|
||||||
|
#indir = 'git/Data/pyrazinamide/input/original'
|
||||||
|
#datadir="~git/Data"
|
||||||
|
datadir="${HOME}/git/Data"
|
||||||
|
|
||||||
|
#=======
|
||||||
# input
|
# input
|
||||||
inpath="/original"
|
#=======
|
||||||
|
indir=${datadir}'/'${drug}'/input'
|
||||||
|
printf "Input dir: ${indir}\n"
|
||||||
|
in_filename='3pl1.pdb'
|
||||||
|
#infile=${basedir}${inpath}${in_filename}
|
||||||
|
infile=${indir}'/'${in_filename}
|
||||||
|
printf "Input file: ${infile}\n"
|
||||||
|
|
||||||
# uncomment as necessary
|
#=======
|
||||||
in_filename="/3pl1.pdb"
|
# output
|
||||||
|
#=======
|
||||||
|
outdir=${datadir}'/'${drug}$'/output'
|
||||||
|
printf "Output dir: ${outdir}"
|
||||||
|
#out_filename='3pl1.dssp'
|
||||||
|
out_filename="${gene_l}.dssp"
|
||||||
|
outfile=${outdir}'/'${out_filename}
|
||||||
|
printf "Output file: ${outfile}\n"
|
||||||
|
|
||||||
infile=${basedir}${inpath}${in_filename}
|
#%%end of variable assignment for input and output files
|
||||||
echo${infile}
|
#================================================================
|
||||||
|
# command line arg to run dssp and create output file
|
||||||
# output file
|
dssp -i ${infile} -o ${outfile}
|
||||||
outpath="/processed"
|
printf "Finished writing: ${outfile}\n"
|
||||||
outdir=${datadir}"/"${drug}${outpath}
|
#printf "Filename: ${out_filename}\nlocated in: ${outdir}\n"
|
||||||
out_filename="/3pl1.dssp"
|
|
||||||
outfile=${outdir}${out_filename}
|
|
||||||
echo${outdir}
|
|
||||||
|
|
||||||
if not os.path.exists(datadir):
|
|
||||||
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
os.makedirs(datadir)
|
|
||||||
exit()
|
|
||||||
|
|
||||||
if not os.path.exists(outdir):
|
|
||||||
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
|
|
||||||
exit()
|
|
||||||
|
|
||||||
else:
|
|
||||||
print('Dir exists: Carrying on')
|
|
||||||
# end of variable assignment for input and output files
|
|
||||||
#%%
|
|
||||||
# ommand line args
|
|
||||||
dssp -i 3pl1.pdb -o 3pl1.dssp
|
|
||||||
dssp -i
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue