From f7e371a58527d35afe92a42ce035dcd8e67d60ea Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 15 May 2020 13:44:57 +0100 Subject: [PATCH] script for saving pdb chains in single file --- scripts/chain_extract.py | 26 +++++------- scripts/pdb_chain_extract.py | 74 +++++++++++++++++++++++++++++++++++ scripts/pdb_chain_splitter.py | 2 +- scripts/pdbtools_commands | 18 +++++++++ 4 files changed, 103 insertions(+), 17 deletions(-) mode change 100644 => 100755 scripts/chain_extract.py create mode 100755 scripts/pdb_chain_extract.py diff --git a/scripts/chain_extract.py b/scripts/chain_extract.py old mode 100644 new mode 100755 index 657f460..084a313 --- a/scripts/chain_extract.py +++ b/scripts/chain_extract.py @@ -10,21 +10,15 @@ from Bio.PDB import PDBParser, PDBIO, Select -io = PDBIO() -pdb = PDBParser().get_structure("3byw", "3byw.pdb") - # Select() Method to return True for every chain in 'chains' -class ChainSelect(Select): - def accept_chain(self, chain): - #print dir(chain) - if chain.id in chains: - return 1 - else: - return 0 - -if __name__ == '__main__': - chains = ['G', 'H'] # specify selected chains - io.set_structure(pdb) - io.save(pdb.get_id() + "_crop.pdb", ChainSelect()) - +class ChainExtract(Select): + def __init__(self, chain): + self.chain = chain + + def accept_chain(self, chain): + #print(dir(chain)) + if chain.id in self.chain: + return 1 + else: + return 0 diff --git a/scripts/pdb_chain_extract.py b/scripts/pdb_chain_extract.py new file mode 100755 index 0000000..dddbcd7 --- /dev/null +++ b/scripts/pdb_chain_extract.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Copyright 2020, Tanushree Tunstall +# This program is distributed under General Public License v. 3. See the file +# COPYING for a copy of the license. + +__description__ = \ +""" +chain_extract.py + +extract chain/s from pdb and saves each chain as a separate file +""" +__author__ = "Tanushree Tunstall" +__date__ = "" +#======================================================================= +import os, shutil, sys +#from pdbtools.helper import cmdline +from chain_extract import ChainExtract +from Bio.PDB import PDBParser, PDBIO, Select +#from Bio.PDB.PDBParser import PDBParser + +#io = PDBIO() +import argparse +#======================================================================= + +def main(): + """ + Function to call if run from command line. + + Example use: + pdb_chain_extract.py -f -c + Extracts chain 'A' by default. + FIXME: extract all chains from the given pdb and write them out individually + """ + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None') + arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces.', nargs = '+', default = 'A', type = list) + arg_parser.add_argument('-p', '--out_path', help='specify output path', default = '.', type = str) + arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension', default = 'pdbfile', type = str) + args = arg_parser.parse_args() + + # Extract chains and write each chain as a separate file + pdb_file = args.pdb_file + print('input pdb file:', pdb_file) + + # type = list, makes it a list of lists. Hence extracting the list of chains. + chains = args.chain[0] + #chains = ['A','B','C'] + print ('user supplied chain:', chains) + + # output filename and path + outpath = args.out_path + outfile = args.out_file + + # get structure + p = PDBParser(PERMISSIVE=1) + structure = p.get_structure(pdb_file, pdb_file) + print('input pdb filename:', structure.get_id()) + + my_chains = chains + #my_chains = ['G', 'H'] + c_names = ''.join(my_chains) + print('Extracting chains:', my_chains) + pdb_chains_file = outpath + '/' + outfile + '_' + c_names + '.pdb' + io = PDBIO() + io.set_structure(structure) + #io.save(structure.get_id() + "_crop.structure", ChainExtract()) + #io.save("_crop.pdb", ChainExtract(my_chains)) + #io.save('{}'.format(pdb_chains_file), ChainExtract(my_chains)) + io.save(pdb_chains_file, ChainExtract(my_chains)) + +if __name__ == "__main__": + main() + diff --git a/scripts/pdb_chain_splitter.py b/scripts/pdb_chain_splitter.py index 9a163b2..3c5d5b5 100755 --- a/scripts/pdb_chain_splitter.py +++ b/scripts/pdb_chain_splitter.py @@ -40,7 +40,7 @@ def main(): # Extract chains and write each chain as a separate file pdb_file = args.pdb_file - print('inpput pdb file:', pdb_file) + print('input pdb file:', pdb_file) # type = list, makes it a list of lists. Hence extracting the list of chains. chains = args.chain[0] diff --git a/scripts/pdbtools_commands b/scripts/pdbtools_commands index 36b1cc7..67f6695 100644 --- a/scripts/pdbtools_commands +++ b/scripts/pdbtools_commands @@ -23,3 +23,21 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home /home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/ethambutol/input/3byw.pdb /home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/cycloserine/input/alr_complex_model.pdb + + + + +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +# my pdb tools + +#====================================================== +# save specifed chains as individual pdbs +#====================================================== +./pdb_chain_splitter.py -i /home/tanu/git/Data/ethambutol/input/3byw.pdb -c DF -p /home/tanu/git/Data/ethambutol/input -o 3byw + +#====================================================== +# save specifed chains as one pdb +#====================================================== +./pdb_chain_extract.py -i /home/tanu/git/Data/ethambutol/input/3byw.pdb -c DF -p /home/tanu/git/Data/ethambutol/input -o 3byw^C +