added dssp.py that runs, processes and outputs csv
This commit is contained in:
parent
08da425e7e
commit
3a1431d8ed
1 changed files with 189 additions and 0 deletions
189
scripts/dssp.py
Executable file
189
scripts/dssp.py
Executable file
|
@ -0,0 +1,189 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Tue Apr 7 09:30:16 2020
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
import sys, os
|
||||
import re
|
||||
import pandas as pd
|
||||
from Bio.PDB import PDBParser
|
||||
from Bio.PDB.DSSP import DSSP
|
||||
import dms_tools2
|
||||
import dms_tools2.dssp
|
||||
import pprint as pp
|
||||
#%%
|
||||
#%% specify homedir and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||
os.getcwd()
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
|
||||
#drug = 'isoniazid'
|
||||
#gene = 'katG'
|
||||
|
||||
drug = 'cycloserine'
|
||||
gene = 'alr'
|
||||
#==========
|
||||
# data dir
|
||||
#==========
|
||||
#indir = 'git/Data/pyrazinamide/input/original'
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
#indir = datadir + '/' + drug + '/' + 'output'
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
in_filename = gene.lower() + '_complex' + '.pdb'
|
||||
#in_filename = 'katg_complex.pdb' # fixme for pnca(consistent filenames i.e pnca_complex.pdb)
|
||||
infile = indir + '/' + in_filename
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
print('Output path:', outdir)
|
||||
|
||||
#out_filename = os.path.splitext(in_filename)[0]+'.dssp' # strip file ext
|
||||
dssp_filename = gene.lower() + '.dssp'
|
||||
dssp_file = outdir + '/' + dssp_filename
|
||||
print('Output dssp:', dssp_file)
|
||||
|
||||
dsspcsv_filename = gene.lower() + '_dssp.csv'
|
||||
dsspcsv_file = outdir + '/' + dsspcsv_filename
|
||||
print('Outfile dssp to csv: ', dsspcsv_file
|
||||
, '\n=============================================================')
|
||||
|
||||
#%% create .dssp from pdb
|
||||
def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
||||
"""
|
||||
Create a DSSP file from a PDB file
|
||||
|
||||
@param infile: pdb file
|
||||
@type infile: string
|
||||
|
||||
@param outfile: dssp file
|
||||
@type outfile: string
|
||||
|
||||
@param DSSP: DSSP executable (argument to os.system)
|
||||
@type DSSP: string
|
||||
|
||||
@return: none, creates dssp file
|
||||
"""
|
||||
# out_file = infile +'.dssp'
|
||||
# outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext
|
||||
os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile))
|
||||
|
||||
#%% extract chain id from dssp
|
||||
|
||||
|
||||
#print(dssp.keys())
|
||||
#print(dssp.keys()[0][0])
|
||||
#print(len(dssp))
|
||||
#print(dssp.keys()[0][0])
|
||||
#print(dssp.keys()[len(dssp)-1][0])
|
||||
def extract_chain_dssp(inputpdbfile):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
inputpdbfile : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@return: chain_ids from dssp output of pdb file
|
||||
@type list
|
||||
|
||||
"""
|
||||
p = PDBParser()
|
||||
structure = p.get_structure(in_filename, infile)
|
||||
model = structure[0]
|
||||
dssp = DSSP(model, infile)
|
||||
|
||||
dssp_chains = []
|
||||
for num_aa in range(0, len(dssp)):
|
||||
# print(num_aa)
|
||||
# extract the chain id only and append to a list
|
||||
dssp_chains.append(dssp.keys()[num_aa][0])
|
||||
chainsL = list(set(dssp_chains))
|
||||
print(chainsL)
|
||||
# sort the list (since sets are not ordered) for convenience
|
||||
# this will be required for dssp_df
|
||||
my_chains = sorted(chainsL)
|
||||
print('dssp output for'
|
||||
, in_filename, 'contains:', len(my_chains)
|
||||
, 'chains:\n', my_chains)
|
||||
return my_chains
|
||||
|
||||
#%%
|
||||
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
||||
"""
|
||||
Create a df from a dssp file containing ASA, RSA, SS for all chains
|
||||
|
||||
@param infile: dssp file
|
||||
@type infile: string
|
||||
|
||||
@param outfile: csv file
|
||||
@type outfile: string
|
||||
|
||||
@param DSSP: DSSP to df processing using dmstools
|
||||
@type DSSP: string
|
||||
|
||||
@return: none, creates csv file
|
||||
"""
|
||||
dssp_df = pd.DataFrame()
|
||||
|
||||
print('Total no. of chains: ', len(my_chains))
|
||||
for chain_id in my_chains:
|
||||
print('Chain id:', chain_id)
|
||||
dssp_cur = pd.DataFrame()
|
||||
dssp_cur = dms_tools2.dssp.processDSSP(inputdsspfile, chain = chain_id)
|
||||
#!!!Important!!!
|
||||
dssp_cur['chain_id'] = chain_id
|
||||
dssp_df = dssp_df.append(dssp_cur)
|
||||
pp.pprint(dssp_df)
|
||||
|
||||
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
||||
# to be the same names as used in the file required for merging later.
|
||||
dssp_df.columns
|
||||
dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True)
|
||||
dssp_df.columns
|
||||
|
||||
# sanity check
|
||||
# if len(dssp_df) == len(dssp):
|
||||
# print('PASS: length of dssp_df has correct length')
|
||||
# else:
|
||||
# print('FAIL: length mismatch for dssp_df'
|
||||
# , '\nexpected length:', len(dssp)
|
||||
# , '\nGot length:', len(dssp_df)
|
||||
# , 'Debug please!')
|
||||
|
||||
# write to csv
|
||||
dssp_df.to_csv(outfile, header=True, index = False)
|
||||
|
||||
print('Finished writing:', outfile
|
||||
, '\nNo. of rows:', len(dssp_df)
|
||||
, '\nNo. of cols:', len(dssp_df.columns)
|
||||
, '\n==============================================================')
|
||||
|
||||
#%%
|
||||
# call
|
||||
#dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
||||
#my_chains = extract_chain_dssp(infile)
|
||||
#dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
|
||||
#%%
|
||||
|
||||
def main():
|
||||
print('Running dssp')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue