added pdb_chain splitter code and wrapper

This commit is contained in:
Tanushree Tunstall 2020-05-13 16:54:20 +01:00
parent 3425d8fa2b
commit 65db4a090e
5 changed files with 114 additions and 2 deletions

28
scripts/chain_splitter.py Executable file
View file

@ -0,0 +1,28 @@
#!/usr/bin/python3
#=======================================================================
# TASK: extract chain from pdb and save each chain as a separate file
# link for saving each chain as a separate file
#=======================================================================
__description__ = \
"""
pdb_chain_splitter.py
extracts chains and saves them as separate pdb files.
"""
__author__ = "Tanushree Tunstall"
__date__ = ""
from Bio.PDB import Select, PDBIO
from Bio.PDB.PDBParser import PDBParser
class ChainSelect(Select):
def __init__(self, chain):
self.chain = chain
def accept_chain(self, chain):
if chain.get_id() == self.chain:
return 1
else:
return 0

View file

@ -61,7 +61,6 @@ drug = args.drug
gene = args.gene
gene_match = gene + '_p.'
# building cols to extract
dr_muts_col = 'dr_mutations_' + drug
other_muts_col = 'other_mutations_' + drug

72
scripts/pdb_chain_splitter.py Executable file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env python3
# Copyright 2020, Tanushree Tunstall
# This program is distributed under General Public License v. 3. See the file
# COPYING for a copy of the license.
__description__ = \
"""
chain_splitter.py
extract chain/s from pdb and saves each chain as a separate file
"""
__author__ = "Tanushree Tunstall"
__date__ = ""
#=======================================================================
import os, shutil, sys
#from pdbtools.helper import cmdline
from chain_splitter import ChainSelect
from Bio.PDB import Select, PDBIO
from Bio.PDB.PDBParser import PDBParser
#io = PDBIO()
import argparse
#=======================================================================
def main():
"""
Function to call if run from command line.
Example use:
pdb_chain_splitter.py -f <your_pdb_file> -c aB
Extracts chain 'A' by default.
FIXME: extract all chains from the given pdb and write them out individually
"""
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None')
arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces. Case insensitive.', nargs = '+', default = 'A', type = list)
arg_parser.add_argument('-p', '--out_path', help='specify output path', default = '.', type = str)
arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension', default = 'pdb_file_chain', type = str)
args = arg_parser.parse_args()
# Extract chains and write each chain as a separate file
pdb_file = args.pdb_file
print('inpput pdb file:', pdb_file)
# type = list, makes it a list of lists. Hence extracting the list of chains.
chains = args.chain[0]
#chains = ['A','B','C']
print ('user supplied chain:', chains)
# output filename and path
outpath = args.out_path
outfile = args.out_file
# get structure
p = PDBParser(PERMISSIVE=1)
structure = p.get_structure(pdb_file, pdb_file)
print('input pdb filename:', structure.get_id())
for chain in chains:
chain = chain.upper()
print ('Extracting chain:', chain)
#pdb_chain_file = 'pdb_file_chain_{}.pdb'.format(chain)
#pdb_chain_file = outpath + '/' + 'pdb_file_chain_{}.pdb'.format(chain)
pdb_chain_file = outpath + '/' + outfile + '_{}.pdb'.format(chain)
io = PDBIO()
io.set_structure(structure)
#io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
if __name__ == "__main__":
main()

@ -1 +1 @@
Subproject commit 9d347c663a60529eda06c03e816423d546dc4104
Subproject commit 8c46611c8ceb37b680bc7bbaa161f284f0742f24

View file

@ -10,3 +10,16 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home
# extract seq from structure
#======================================================
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
#/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -c A -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
#======================================================
# mutate residue: FIXME, needs charm
#======================================================
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_mutator -r 39 -m XXX /home/tanu/git/Data/ethambutol/input/3byw.pdb
#======================================================
# check ligands in pdb
#======================================================
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/ethambutol/input/3byw.pdb
/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/cycloserine/input/alr_complex_model.pdb