From 08ad16adbb8e7b1201d0adffe36c42a4bc19c66b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 30 Nov 2020 14:11:46 +0000 Subject: [PATCH] added chain_extract.py and pdb_chain_extract.py --- scripts/my_pdbtools/chain_extract.py | 24 ++++++++ scripts/my_pdbtools/pdb_chain_extract.py | 70 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 scripts/my_pdbtools/chain_extract.py create mode 100755 scripts/my_pdbtools/pdb_chain_extract.py diff --git a/scripts/my_pdbtools/chain_extract.py b/scripts/my_pdbtools/chain_extract.py new file mode 100755 index 0000000..084a313 --- /dev/null +++ b/scripts/my_pdbtools/chain_extract.py @@ -0,0 +1,24 @@ +#!/usr/bin/python3 + +#======================================================================= +# TASK: select specified chains from the pdb & save a cropped PDB with +# the selected chains. Useful for dimer, etc modelling. + +# link for saving each chain as a separate file +# https://stackoverflow.com/questions/11685716/how-to-extract-chains-from-a-pdb-file +#======================================================================= + +from Bio.PDB import PDBParser, PDBIO, Select + + +# Select() Method to return True for every chain in 'chains' +class ChainExtract(Select): + def __init__(self, chain): + self.chain = chain + + def accept_chain(self, chain): + #print(dir(chain)) + if chain.id in self.chain: + return 1 + else: + return 0 diff --git a/scripts/my_pdbtools/pdb_chain_extract.py b/scripts/my_pdbtools/pdb_chain_extract.py new file mode 100755 index 0000000..3e3cf4b --- /dev/null +++ b/scripts/my_pdbtools/pdb_chain_extract.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +# Copyright 2020, Tanushree Tunstall +# This program is distributed under General Public License v. 3. See the file +# COPYING for a copy of the license. + +__description__ = \ +""" +chain_extract.py + +extract chain/s from pdb and saves selected chains in a pdb file +""" +__author__ = "Tanushree Tunstall" +__date__ = "" +#======================================================================= +import os, shutil, sys +#from pdbtools.helper import cmdline +from chain_extract import ChainExtract +from Bio.PDB import PDBParser, PDBIO, Select +#from Bio.PDB.PDBParser import PDBParser + +#io = PDBIO() +import argparse +#======================================================================= + +def main(): + """ + Function to call if run from command line. + + Example use: + pdb_chain_extract.py -f -c -p -o + Extracts chain 'A' by default. + """ + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None') + arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces.', nargs = '+', default = 'A', type = list) + arg_parser.add_argument('-p', '--out_path', help='specify output path', default = '.', type = str) + arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension', default = 'pdbfile', type = str) + args = arg_parser.parse_args() + + # Extract chains and write each chain as a separate file + pdb_file = args.pdb_file + print('input pdb file:', pdb_file) + + # type = list, makes it a list of lists. Hence extracting the list of chains. + chains = args.chain[0] + #chains = ['A','B','C'] + print ('user supplied chain:', chains) + + # output filename and path + outpath = args.out_path + outfile = args.out_file + + # get structure + p = PDBParser(PERMISSIVE=1) + structure = p.get_structure(pdb_file, pdb_file) + print('input pdb filename:', structure.get_id()) + + my_chains = chains + #my_chains = ['G', 'H'] + c_names = ''.join(my_chains) + print('Extracting chains:', my_chains) + pdb_chains_file = outpath + '/' + outfile + '_' + c_names + '.pdb' + io = PDBIO() + io.set_structure(structure) + io.save(pdb_chains_file, ChainExtract(my_chains)) + +if __name__ == "__main__": + main() +