From 65db4a090e80bdd58612ed24f619b3929a7cdfc6 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 13 May 2020 16:54:20 +0100 Subject: [PATCH] added pdb_chain splitter code and wrapper --- scripts/chain_splitter.py | 28 ++++++++++++++ scripts/data_extraction.py | 1 - scripts/pdb_chain_splitter.py | 72 +++++++++++++++++++++++++++++++++++ scripts/pdbtools | 2 +- scripts/pdbtools_commands | 13 +++++++ 5 files changed, 114 insertions(+), 2 deletions(-) create mode 100755 scripts/chain_splitter.py create mode 100755 scripts/pdb_chain_splitter.py diff --git a/scripts/chain_splitter.py b/scripts/chain_splitter.py new file mode 100755 index 0000000..b798dde --- /dev/null +++ b/scripts/chain_splitter.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 + +#======================================================================= +# TASK: extract chain from pdb and save each chain as a separate file + +# link for saving each chain as a separate file +#======================================================================= +__description__ = \ +""" +pdb_chain_splitter.py + +extracts chains and saves them as separate pdb files. +""" +__author__ = "Tanushree Tunstall" +__date__ = "" + +from Bio.PDB import Select, PDBIO +from Bio.PDB.PDBParser import PDBParser + +class ChainSelect(Select): + def __init__(self, chain): + self.chain = chain + + def accept_chain(self, chain): + if chain.get_id() == self.chain: + return 1 + else: + return 0 diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index 259afa1..5c6f4c6 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -61,7 +61,6 @@ drug = args.drug gene = args.gene gene_match = gene + '_p.' - # building cols to extract dr_muts_col = 'dr_mutations_' + drug other_muts_col = 'other_mutations_' + drug diff --git a/scripts/pdb_chain_splitter.py b/scripts/pdb_chain_splitter.py new file mode 100755 index 0000000..a0d7e70 --- /dev/null +++ b/scripts/pdb_chain_splitter.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +# Copyright 2020, Tanushree Tunstall +# This program is distributed under General Public License v. 3. See the file +# COPYING for a copy of the license. + +__description__ = \ +""" +chain_splitter.py + +extract chain/s from pdb and saves each chain as a separate file +""" +__author__ = "Tanushree Tunstall" +__date__ = "" +#======================================================================= +import os, shutil, sys +#from pdbtools.helper import cmdline +from chain_splitter import ChainSelect +from Bio.PDB import Select, PDBIO +from Bio.PDB.PDBParser import PDBParser +#io = PDBIO() +import argparse +#======================================================================= + +def main(): + """ + Function to call if run from command line. + + Example use: + pdb_chain_splitter.py -f -c aB + Extracts chain 'A' by default. + FIXME: extract all chains from the given pdb and write them out individually + """ + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None') + arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces. Case insensitive.', nargs = '+', default = 'A', type = list) + arg_parser.add_argument('-p', '--out_path', help='specify output path', default = '.', type = str) + arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension', default = 'pdb_file_chain', type = str) + args = arg_parser.parse_args() + + # Extract chains and write each chain as a separate file + pdb_file = args.pdb_file + print('inpput pdb file:', pdb_file) + + # type = list, makes it a list of lists. Hence extracting the list of chains. + chains = args.chain[0] + #chains = ['A','B','C'] + print ('user supplied chain:', chains) + + # output filename and path + outpath = args.out_path + outfile = args.out_file + + # get structure + p = PDBParser(PERMISSIVE=1) + structure = p.get_structure(pdb_file, pdb_file) + print('input pdb filename:', structure.get_id()) + + for chain in chains: + chain = chain.upper() + print ('Extracting chain:', chain) + #pdb_chain_file = 'pdb_file_chain_{}.pdb'.format(chain) + #pdb_chain_file = outpath + '/' + 'pdb_file_chain_{}.pdb'.format(chain) + pdb_chain_file = outpath + '/' + outfile + '_{}.pdb'.format(chain) + io = PDBIO() + io.set_structure(structure) + #io.save('{}'.format(pdb_chain_file), ChainSelect(chain)) + io.save('{}'.format(pdb_chain_file), ChainSelect(chain)) + +if __name__ == "__main__": + main() + diff --git a/scripts/pdbtools b/scripts/pdbtools index 9d347c6..8c46611 160000 --- a/scripts/pdbtools +++ b/scripts/pdbtools @@ -1 +1 @@ -Subproject commit 9d347c663a60529eda06c03e816423d546dc4104 +Subproject commit 8c46611c8ceb37b680bc7bbaa161f284f0742f24 diff --git a/scripts/pdbtools_commands b/scripts/pdbtools_commands index ff1b274..36b1cc7 100644 --- a/scripts/pdbtools_commands +++ b/scripts/pdbtools_commands @@ -10,3 +10,16 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home # extract seq from structure #====================================================== /home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt +#/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -c A -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt + +#====================================================== +# mutate residue: FIXME, needs charm +#====================================================== +/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_mutator -r 39 -m XXX /home/tanu/git/Data/ethambutol/input/3byw.pdb + +#====================================================== +# check ligands in pdb +#====================================================== +/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/ethambutol/input/3byw.pdb + +/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/cycloserine/input/alr_complex_model.pdb