LSHTM_analysis/meta_data_analysis/RD.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb  6 12:18:24 2020

@author: tanu
"""
#=======================================================================
# Task: Residue Depth (rd) values for amino acid sequence using the
# Depth server.
# Depth server: http://cospi.iiserpune.ac.in/depth/htdocs/index.html
# FIXME: for now input is a valid pdb code NOT a valid pdb file that you can upload
# Input: PDB file  (valid pdb code)

# Output:

# useful links
# http://foldxsuite.crg.eu/faq-page#
# after fold x downlaoded, extract and run it from
# https://biopython.org/DIST/docs/api/Bio.PDB.ResidueDepth%27-module.html
# proDepth: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0007072
# needs biopython and msms
#=======================================================================
#%% load packages
import sys, os
import re
import pandas as pd
from Bio.PDB.ResidueDepth import ResidueDepth
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.ResidueDepth import get_surface

#%% specify input and output variables
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde

# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
os.getcwd()
#=======================================================================
#%% variable assignment: input and output paths & filenames
drug = 'pyrazinamide'
gene = 'pncA'
gene_match = gene + '_p.'

#==========
# data dir
#==========
#indir = 'git/Data/pyrazinamide/input/original'
datadir = homedir + '/' + 'git/Data'

#=======
# input
#=======
indir = datadir + '/' + drug + '/' + 'input'
in_filename = '3pl1.pdb'
infile = indir + '/' + in_filename
print('Input filename:', in_filename
      , '\nInput path:', indir)

#=======
# output
#=======
outdir = datadir + '/' + drug + '/' + 'output'
# specify output file
out_filename = 'XXX'
outfile =  outdir + '/' + out_filename
print('Output filename: ', out_filename
      , '\nOutput path: ', outdir)

#%% end of variable assignment for input and output files
#================================================================
# Read input pdb file
parser = PDBParser()

# extract the 3 letter pdb code
pdb_code = re.search(r'(^[0-9]{1}\w{3})', in_filename).group(1)

#structure = parser.get_structure("3pl1", "/home/tanu/git/3pl1.pdb")
structure = parser.get_structure(pdb_code, infile)
model = structure[0]
surface = get_surface(model)

rd = ResidueDepth(model)
print(rd['A',(' ', 152, ' ')])
rd.keys()
foo = rd.property_dict
rd.property_keys
baz = rd.property_list


# To calculate the residue depth (average atom depth of the atoms in a residue):
from Bio.PDB.ResidueDepth import residue_depth
chain = model['A']
res152 = chain[152]
rd2 = residue_depth(res152, surface)

# df from dict
foo1 = pd.DataFrame.from_dict(baz, orient='index', columns = ['res_depth', 'surface'])
test = pd.Series(foo, name = "test")

# df from list
foo2 = pd.DataFrame(baz, columns = ['residue', 'residue depth'])


### iterate
for i in range(185):
    print(i)
    rd3 = residue_depth(res+i, surface)
    print(rd3)