LSHTM_analysis/scripts/pdb_chain_extract.py
2020-05-15 13:48:50 +01:00

70 lines
2.4 KiB
Python
Executable file

#!/usr/bin/env python3
# Copyright 2020, Tanushree Tunstall
# This program is distributed under General Public License v. 3. See the file
# COPYING for a copy of the license.
__description__ = \
"""
chain_extract.py
extract chain/s from pdb and saves each chain as a separate file
"""
__author__ = "Tanushree Tunstall"
__date__ = ""
#=======================================================================
import os, shutil, sys
#from pdbtools.helper import cmdline
from chain_extract import ChainExtract
from Bio.PDB import PDBParser, PDBIO, Select
#from Bio.PDB.PDBParser import PDBParser
#io = PDBIO()
import argparse
#=======================================================================
def main():
"""
Function to call if run from command line.
Example use:
pdb_chain_extract.py -f <your_pdb_file> -c <chainid1><chainid2> -p <outpath> -o <outfile>
Extracts chain 'A' by default.
"""
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None')
arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces.', nargs = '+', default = 'A', type = list)
arg_parser.add_argument('-p', '--out_path', help='specify output path', default = '.', type = str)
arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension', default = 'pdbfile', type = str)
args = arg_parser.parse_args()
# Extract chains and write each chain as a separate file
pdb_file = args.pdb_file
print('input pdb file:', pdb_file)
# type = list, makes it a list of lists. Hence extracting the list of chains.
chains = args.chain[0]
#chains = ['A','B','C']
print ('user supplied chain:', chains)
# output filename and path
outpath = args.out_path
outfile = args.out_file
# get structure
p = PDBParser(PERMISSIVE=1)
structure = p.get_structure(pdb_file, pdb_file)
print('input pdb filename:', structure.get_id())
my_chains = chains
#my_chains = ['G', 'H']
c_names = ''.join(my_chains)
print('Extracting chains:', my_chains)
pdb_chains_file = outpath + '/' + outfile + '_' + c_names + '.pdb'
io = PDBIO()
io.set_structure(structure)
io.save(pdb_chains_file, ChainExtract(my_chains))
if __name__ == "__main__":
main()