added pdb_chain splitter code and wrapper

2020-05-13 16:54:20 +01:00 · 2020-05-13 16:54:20 +01:00 · 65db4a090e
commit 65db4a090e
parent 3425d8fa2b
5 changed files with 114 additions and 2 deletions
--- a/scripts/chain_splitter.py
+++ b/scripts/chain_splitter.py
@ -0,0 +1,28 @@
+#!/usr/bin/python3
+
+#=======================================================================
+# TASK: extract chain from pdb and save each chain as a separate file
+
+# link for saving each chain as a separate file
+#=======================================================================   
+__description__ = \
+"""
+pdb_chain_splitter.py
+
+extracts chains and saves them as separate pdb files.
+"""
+__author__ = "Tanushree Tunstall"
+__date__ = ""
+
+from Bio.PDB import Select, PDBIO
+from Bio.PDB.PDBParser import PDBParser
+
+class ChainSelect(Select):
+    def __init__(self, chain):
+        self.chain = chain
+
+    def accept_chain(self, chain):
+        if chain.get_id() == self.chain:
+            return 1
+        else:          
+            return 0
--- a/scripts/data_extraction.py
+++ b/scripts/data_extraction.py
@ -61,7 +61,6 @@ drug = args.drug
 gene = args.gene

 gene_match = gene + '_p.'
-
 # building cols to extract
 dr_muts_col = 'dr_mutations_' + drug
 other_muts_col = 'other_mutations_' + drug
--- a/scripts/pdb_chain_splitter.py
+++ b/scripts/pdb_chain_splitter.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+# Copyright 2020, Tanushree Tunstall
+# This program is distributed under General Public License v. 3.  See the file
+# COPYING for a copy of the license.
+
+__description__ = \
+"""
+chain_splitter.py
+
+extract chain/s from pdb and saves each chain as a separate file
+"""
+__author__ = "Tanushree Tunstall"
+__date__ = ""
+#=======================================================================
+import os, shutil, sys
+#from pdbtools.helper import cmdline
+from chain_splitter import ChainSelect
+from Bio.PDB import Select, PDBIO
+from Bio.PDB.PDBParser import PDBParser
+#io = PDBIO()
+import argparse
+#=======================================================================
+
+def main():
+    """
+    Function to call if run from command line.
+
+    Example use:
+    pdb_chain_splitter.py -f <your_pdb_file> -c aB
+    Extracts chain 'A' by default.
+    FIXME: extract all chains from the given pdb and write them out individually
+    """
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument('-i', '--pdb_file', help='provide pdb file', default = 'None')
+    arg_parser.add_argument('-c', '--chain', help='chain/s to extract without spaces. Case insensitive.', nargs = '+',  default = 'A', type = list)
+    arg_parser.add_argument('-p', '--out_path', help='specify output path',  default = '.', type = str)
+    arg_parser.add_argument('-o', '--out_file', help='specify output filename. Will be used as a prefix to append chain id and pdb file extension',  default = 'pdb_file_chain', type = str)
+    args = arg_parser.parse_args()
+
+    # Extract chains and write each chain as a separate file
+    pdb_file = args.pdb_file
+    print('inpput pdb file:', pdb_file)
+    
+    # type = list, makes it a list of lists. Hence extracting the list of chains.
+    chains = args.chain[0]
+    #chains = ['A','B','C']
+    print ('user supplied chain:', chains)
+    
+    # output filename and path
+    outpath = args.out_path
+    outfile = args.out_file
+
+    # get structure
+    p = PDBParser(PERMISSIVE=1)       
+    structure = p.get_structure(pdb_file, pdb_file)
+    print('input pdb filename:', structure.get_id())
+
+    for chain in chains:
+        chain = chain.upper()
+        print ('Extracting chain:', chain)
+        #pdb_chain_file = 'pdb_file_chain_{}.pdb'.format(chain)                                 
+        #pdb_chain_file = outpath + '/' + 'pdb_file_chain_{}.pdb'.format(chain)
+        pdb_chain_file = outpath + '/' + outfile + '_{}.pdb'.format(chain)                                     
+        io = PDBIO()               
+        io.set_structure(structure)
+        #io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
+        io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
+
+if __name__ == "__main__":
+    main()
+
--- a/scripts/pdbtools
+++ b/scripts/pdbtools
@ -1 +1 @@
-Subproject commit 9d347c663a60529eda06c03e816423d546dc4104
+Subproject commit 8c46611c8ceb37b680bc7bbaa161f284f0742f24
--- a/scripts/pdbtools_commands
+++ b/scripts/pdbtools_commands
@ -10,3 +10,16 @@ home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_residue_renumber /home
 # extract seq from structure
 #======================================================
 /home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
+#/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_seq -c A -a /home/tanu/git/Data/ethambutol/input/3byw.pdb > 3byw_seq.txt
+
+#======================================================
+# mutate residue: FIXME, needs charm
+#======================================================
+/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_mutator -r 39 -m XXX /home/tanu/git/Data/ethambutol/input/3byw.pdb
+
+#======================================================
+# check ligands in pdb
+#======================================================
+/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/ethambutol/input/3byw.pdb
+
+/home/tanu/git/LSHTM_analysis/scripts/pdbtools/scripts/pdb_ligand /home/tanu/git/Data/cycloserine/input/alr_complex_model.pdb