From ec25e9fd2df32b547be82816b567052951e91e15 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 16 Feb 2020 14:36:51 +0000 Subject: [PATCH] adding scripts for struct params --- meta_data_analysis/RD.py | 59 ++++++++++++++++++++++++ meta_data_analysis/dssp_df.py | 68 ++++++++++++++++++++++++++++ meta_data_analysis/kd.py | 20 +++++++++ meta_data_analysis/run_pdb_dssp.py | 72 ++++++++++++++++++++++++++++++ meta_data_analysis/run_pdb_dssp.sh | 51 +++++++++++++++++++++ 5 files changed, 270 insertions(+) create mode 100755 meta_data_analysis/RD.py create mode 100755 meta_data_analysis/dssp_df.py create mode 100644 meta_data_analysis/kd.py create mode 100755 meta_data_analysis/run_pdb_dssp.py create mode 100755 meta_data_analysis/run_pdb_dssp.sh diff --git a/meta_data_analysis/RD.py b/meta_data_analysis/RD.py new file mode 100755 index 0000000..5bb5e6b --- /dev/null +++ b/meta_data_analysis/RD.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Feb 6 12:18:24 2020 + +@author: tanu +""" +#http://foldxsuite.crg.eu/faq-page# +# after fold x downlaoded, extract and run it from +#https://biopython.org/DIST/docs/api/Bio.PDB.ResidueDepth%27-module.html +#proDepth: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0007072 +#Depth server: http://cospi.iiserpune.ac.in/depth/htdocs/index.html +# needs biopython and msms + +# load libraries +import sys, os +import pandas as pd +from Bio.PDB.ResidueDepth import ResidueDepth +from Bio.PDB.PDBParser import PDBParser +from Bio.PDB.ResidueDepth import get_surface + +#%% +homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde +os.getcwd() +os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params') +os.getcwd() +#%% +parser = PDBParser() +structure = parser.get_structure("3pl1", "/home/tanu/git/3pl1.pdb") +model = structure[0] +surface = get_surface(model) + +rd = ResidueDepth(model) +print(rd['A',(' ', 152, ' ')]) +rd.keys() +foo = rd.property_dict +rd.property_keys +baz = rd.property_list + + +#To calculate the residue depth (average atom depth of the atoms in a residue): +from Bio.PDB.ResidueDepth import residue_depth +chain = model['A'] +res152 = chain[152] +rd2 = residue_depth(res152, surface) + +# df from dict +foo1 = pd.DataFrame.from_dict(baz, orient='index', columns = ['res_depth', 'surface']) +test = pd.Series(foo, name = "test") + +# df from list +foo2 = pd.DataFrame(baz, columns = ['residue', 'residue depth']) + + +### iterate +for i in range(185): + print(i) + rd3 = residue_depth(res+i, surface) + print(rd3) \ No newline at end of file diff --git a/meta_data_analysis/dssp_df.py b/meta_data_analysis/dssp_df.py new file mode 100755 index 0000000..2791a27 --- /dev/null +++ b/meta_data_analysis/dssp_df.py @@ -0,0 +1,68 @@ +#!/home/tanu/anaconda3/envs/ContactMap/bin/python3 +# Read a DSSP file into a data frame and pretty-print it + +#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html +import sys, os +import pandas as pd +import pprint as pp +import dms_tools2 +import dms_tools2.dssp + +#%% +# my working dir +homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde +os.getcwd() +os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params') +os.getcwd() +#%% +# sample example +dssp_file = "./3pl1.dssp" +dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A') + +# outputs to console +#returns df with ASA and RSA (base on Tien at al 2013 (theor.) values) +#Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area +pp.pprint(dssp_df) + +# write to csv +dssp_df.to_csv('3pl1_dssp_df', header=True, index = False) + +#%% specify variables for input and output paths and filenames +drug = "pyrazinamide" +#gene = "pnca" + +datadir = homedir + "/git/Data" +basedir = datadir + "/" + drug + "/input" + +# input +inpath = "/processed" +in_filename = "/3pl1.dssp" +infile = basedir + inpath + in_filename +#print(infile) + +# output file +outpath = "/output" +outdir = datadir + "/" + drug + outpath +out_filename = "/3pl1_dssp_df" +outfile = outdir + out_filename +print(outdir); print(outfile) + +if not os.path.exists(datadir): + print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md') + os.makedirs(datadir) + exit() + +if not os.path.exists(outdir): + print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md') + exit() + +else: + print('Dir exists: Carrying on') +# end of variable assignment for input and output files +#%% <----- fixme +dssp_file = infile +dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A') + +#%% +# write to csv +dssp_df.to_csv(outfile, header=True, index = False) diff --git a/meta_data_analysis/kd.py b/meta_data_analysis/kd.py new file mode 100644 index 0000000..dabeb64 --- /dev/null +++ b/meta_data_analysis/kd.py @@ -0,0 +1,20 @@ +#!/usr/bin/python + +# hydrophobicity and SAA +#https://biopython.org/DIST/docs/api/Bio.SeqUtils.ProtParamData-pysrc.html +#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html +import sys, os +import pandas as pd +import pprint as pp +#import dms_tools2 +#import dms_tools2.dssp + +from Bio.SeqUtils import ProtParamData +#%% +homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde +os.getcwd() +os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params') +os.getcwd() +#%% + +foo = ProtParamData.kd(3pl1.pdb) diff --git a/meta_data_analysis/run_pdb_dssp.py b/meta_data_analysis/run_pdb_dssp.py new file mode 100755 index 0000000..3a57f02 --- /dev/null +++ b/meta_data_analysis/run_pdb_dssp.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# Read a PDB and output DSSP to console +import sys, os +from Bio.PDB import PDBParser +from Bio.PDB.DSSP import DSSP +import pandas as pd +import pprint as pp + +#%% +# TASK: read a pdb file and generate a dssp output file +# FIXME: Pending output dssp hasn't been generated +# needs dssp exe on linux +# may be easier to run the dssp exe locally +#%% +# my working dir +os.getcwd() +homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde +os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis') +os.getcwd() +#%% +# specify variables for input and output paths and filenames +drug = "pyrazinamide" +#gene = "pnca" + +datadir = homedir + "/git/Data" +basedir = datadir + "/" + drug + "/input" + +# input +inpath = "/original" + +# uncomment as necessary +in_filename = "/3pl1.pdb" + +infile = basedir + inpath + in_filename +#print(infile) + +# output file +outpath = "/processed" +outdir = datadir + "/" + drug + outpath +out_filename = "/3pl1.dssp" +outfile = outdir + out_filename +#print(outdir) + +if not os.path.exists(datadir): + print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md') + os.makedirs(datadir) + exit() + +if not os.path.exists(outdir): + print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md') + exit() + +else: + print('Dir exists: Carrying on') +# end of variable assignment for input and output files +#%% +p = PDBParser() +structure = p.get_structure("3pl1", infile) + +model = structure[0] +dssp = DSSP(model, infile) +#dssp = DSSP(model, infile, dssp='mkdssp') #incase you used DSSP2 exe +pp.pprint(dssp) + +#DSSP data is accessed by a tuple - (chain id, residue id): RSA +a_key = list(dssp.keys())[3] +dssp[a_key] + +pp.pprint(dssp.keys()) +pp.pprint(dssp.property_dict) +pp.pprint(dssp.property_keys) +pp.pprint(dssp.property_list) diff --git a/meta_data_analysis/run_pdb_dssp.sh b/meta_data_analysis/run_pdb_dssp.sh new file mode 100755 index 0000000..d6050f4 --- /dev/null +++ b/meta_data_analysis/run_pdb_dssp.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Feb 16 11:21:44 2020 + +@author: tanu +""" +#!/usr/bin/bash +# Run dssp exe + +#%% +# specify variables for input and output paths and filenames +drug="pyrazinamide" +#gene = "pnca" + +datadir="~git/Data" +basedir=${datadir}"/"${drug} +echo${basedir} + +# input +inpath="/original" + +# uncomment as necessary +in_filename="/3pl1.pdb" + +infile=${basedir}${inpath}${in_filename} +echo${infile} + +# output file +outpath="/processed" +outdir=${datadir}"/"${drug}${outpath} +out_filename="/3pl1.dssp" +outfile=${outdir}${out_filename} +echo${outdir} + +if not os.path.exists(datadir): + print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md') + os.makedirs(datadir) + exit() + +if not os.path.exists(outdir): + print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md') + exit() + +else: + print('Dir exists: Carrying on') +# end of variable assignment for input and output files +#%% +# ommand line args +dssp -i 3pl1.pdb -o 3pl1.dssp +dssp -i