adding scripts for struct params

This commit is contained in:
Tanushree Tunstall 2020-02-16 14:36:51 +00:00
parent 4ef68bdc1b
commit b56c0b8b68
5 changed files with 270 additions and 0 deletions

59
meta_data_analysis/RD.py Executable file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 6 12:18:24 2020
@author: tanu
"""
#http://foldxsuite.crg.eu/faq-page#
# after fold x downlaoded, extract and run it from
#https://biopython.org/DIST/docs/api/Bio.PDB.ResidueDepth%27-module.html
#proDepth: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0007072
#Depth server: http://cospi.iiserpune.ac.in/depth/htdocs/index.html
# needs biopython and msms
# load libraries
import sys, os
import pandas as pd
from Bio.PDB.ResidueDepth import ResidueDepth
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.ResidueDepth import get_surface
#%%
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
os.getcwd()
#%%
parser = PDBParser()
structure = parser.get_structure("3pl1", "/home/tanu/git/3pl1.pdb")
model = structure[0]
surface = get_surface(model)
rd = ResidueDepth(model)
print(rd['A',(' ', 152, ' ')])
rd.keys()
foo = rd.property_dict
rd.property_keys
baz = rd.property_list
#To calculate the residue depth (average atom depth of the atoms in a residue):
from Bio.PDB.ResidueDepth import residue_depth
chain = model['A']
res152 = chain[152]
rd2 = residue_depth(res152, surface)
# df from dict
foo1 = pd.DataFrame.from_dict(baz, orient='index', columns = ['res_depth', 'surface'])
test = pd.Series(foo, name = "test")
# df from list
foo2 = pd.DataFrame(baz, columns = ['residue', 'residue depth'])
### iterate
for i in range(185):
print(i)
rd3 = residue_depth(res+i, surface)
print(rd3)

68
meta_data_analysis/dssp_df.py Executable file
View file

@ -0,0 +1,68 @@
#!/home/tanu/anaconda3/envs/ContactMap/bin/python3
# Read a DSSP file into a data frame and pretty-print it
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
import sys, os
import pandas as pd
import pprint as pp
import dms_tools2
import dms_tools2.dssp
#%%
# my working dir
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
os.getcwd()
#%%
# sample example
dssp_file = "./3pl1.dssp"
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
# outputs to console
#returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
#Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
pp.pprint(dssp_df)
# write to csv
dssp_df.to_csv('3pl1_dssp_df', header=True, index = False)
#%% specify variables for input and output paths and filenames
drug = "pyrazinamide"
#gene = "pnca"
datadir = homedir + "/git/Data"
basedir = datadir + "/" + drug + "/input"
# input
inpath = "/processed"
in_filename = "/3pl1.dssp"
infile = basedir + inpath + in_filename
#print(infile)
# output file
outpath = "/output"
outdir = datadir + "/" + drug + outpath
out_filename = "/3pl1_dssp_df"
outfile = outdir + out_filename
print(outdir); print(outfile)
if not os.path.exists(datadir):
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
os.makedirs(datadir)
exit()
if not os.path.exists(outdir):
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
exit()
else:
print('Dir exists: Carrying on')
# end of variable assignment for input and output files
#%% <----- fixme
dssp_file = infile
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
#%%
# write to csv
dssp_df.to_csv(outfile, header=True, index = False)

20
meta_data_analysis/kd.py Normal file
View file

@ -0,0 +1,20 @@
#!/usr/bin/python
# hydrophobicity and SAA
#https://biopython.org/DIST/docs/api/Bio.SeqUtils.ProtParamData-pysrc.html
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
import sys, os
import pandas as pd
import pprint as pp
#import dms_tools2
#import dms_tools2.dssp
from Bio.SeqUtils import ProtParamData
#%%
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
os.getcwd()
#%%
foo = ProtParamData.kd(3pl1.pdb)

View file

@ -0,0 +1,72 @@
#!/usr/bin/python
# Read a PDB and output DSSP to console
import sys, os
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP
import pandas as pd
import pprint as pp
#%%
# TASK: read a pdb file and generate a dssp output file
# FIXME: Pending output dssp hasn't been generated
# needs dssp exe on linux
# may be easier to run the dssp exe locally
#%%
# my working dir
os.getcwd()
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
os.getcwd()
#%%
# specify variables for input and output paths and filenames
drug = "pyrazinamide"
#gene = "pnca"
datadir = homedir + "/git/Data"
basedir = datadir + "/" + drug + "/input"
# input
inpath = "/original"
# uncomment as necessary
in_filename = "/3pl1.pdb"
infile = basedir + inpath + in_filename
#print(infile)
# output file
outpath = "/processed"
outdir = datadir + "/" + drug + outpath
out_filename = "/3pl1.dssp"
outfile = outdir + out_filename
#print(outdir)
if not os.path.exists(datadir):
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
os.makedirs(datadir)
exit()
if not os.path.exists(outdir):
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
exit()
else:
print('Dir exists: Carrying on')
# end of variable assignment for input and output files
#%%
p = PDBParser()
structure = p.get_structure("3pl1", infile)
model = structure[0]
dssp = DSSP(model, infile)
#dssp = DSSP(model, infile, dssp='mkdssp') #incase you used DSSP2 exe
pp.pprint(dssp)
#DSSP data is accessed by a tuple - (chain id, residue id): RSA
a_key = list(dssp.keys())[3]
dssp[a_key]
pp.pprint(dssp.keys())
pp.pprint(dssp.property_dict)
pp.pprint(dssp.property_keys)
pp.pprint(dssp.property_list)

View file

@ -0,0 +1,51 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 16 11:21:44 2020
@author: tanu
"""
#!/usr/bin/bash
# Run dssp exe
#%%
# specify variables for input and output paths and filenames
drug="pyrazinamide"
#gene = "pnca"
datadir="~git/Data"
basedir=${datadir}"/"${drug}
echo${basedir}
# input
inpath="/original"
# uncomment as necessary
in_filename="/3pl1.pdb"
infile=${basedir}${inpath}${in_filename}
echo${infile}
# output file
outpath="/processed"
outdir=${datadir}"/"${drug}${outpath}
out_filename="/3pl1.dssp"
outfile=${outdir}${out_filename}
echo${outdir}
if not os.path.exists(datadir):
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
os.makedirs(datadir)
exit()
if not os.path.exists(outdir):
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
exit()
else:
print('Dir exists: Carrying on')
# end of variable assignment for input and output files
#%%
# ommand line args
dssp -i 3pl1.pdb -o 3pl1.dssp
dssp -i