modified dssp_df to handle multiple chains
This commit is contained in:
parent
3a1431d8ed
commit
91068f5bd1
2 changed files with 119 additions and 41 deletions
|
@ -6,6 +6,7 @@ Created on Tue Apr 7 09:30:16 2020
|
|||
@author: tanu
|
||||
"""
|
||||
import sys, os
|
||||
import argparse
|
||||
import re
|
||||
import pandas as pd
|
||||
from Bio.PDB import PDBParser
|
||||
|
@ -21,6 +22,13 @@ homedir = os.path.expanduser('~')
|
|||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||
os.getcwd()
|
||||
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
|
@ -29,8 +37,11 @@ os.getcwd()
|
|||
#drug = 'isoniazid'
|
||||
#gene = 'katG'
|
||||
|
||||
drug = 'cycloserine'
|
||||
gene = 'alr'
|
||||
#drug = 'cycloserine'
|
||||
#gene = 'alr'
|
||||
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
#==========
|
||||
# data dir
|
||||
#==========
|
||||
|
@ -67,8 +78,8 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
|||
"""
|
||||
Create a DSSP file from a PDB file
|
||||
|
||||
@param infile: pdb file
|
||||
@type infile: string
|
||||
@param inputpdbfile: pdb file
|
||||
@type inputpdbfile: string
|
||||
|
||||
@param outfile: dssp file
|
||||
@type outfile: string
|
||||
|
@ -92,14 +103,18 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
|||
#print(dssp.keys()[len(dssp)-1][0])
|
||||
def extract_chain_dssp(inputpdbfile):
|
||||
"""
|
||||
extracts chain_ids from dssp run on pdb file
|
||||
This is to allow processing of dssp output to df
|
||||
and for writing as csv file
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inputpdbfile : TYPE
|
||||
DESCRIPTION.
|
||||
@param inputpdbfile: pdb file
|
||||
@type inputpdbfile: string
|
||||
|
||||
Returns
|
||||
-------
|
||||
@return: chain_ids from dssp output of pdb file
|
||||
@return: chain_ids from running dssp on pdb file
|
||||
@type list
|
||||
|
||||
"""
|
||||
|
@ -117,11 +132,11 @@ def extract_chain_dssp(inputpdbfile):
|
|||
print(chainsL)
|
||||
# sort the list (since sets are not ordered) for convenience
|
||||
# this will be required for dssp_df
|
||||
my_chains = sorted(chainsL)
|
||||
pdbchainlist = sorted(chainsL)
|
||||
print('dssp output for'
|
||||
, in_filename, 'contains:', len(my_chains)
|
||||
, 'chains:\n', my_chains)
|
||||
return my_chains
|
||||
, in_filename, 'contains:', len(pdbchainlist)
|
||||
, 'chains:\n', pdbchainlist)
|
||||
return pdbchainlist
|
||||
|
||||
#%%
|
||||
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
||||
|
@ -141,8 +156,8 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
|||
"""
|
||||
dssp_df = pd.DataFrame()
|
||||
|
||||
print('Total no. of chains: ', len(my_chains))
|
||||
for chain_id in my_chains:
|
||||
print('Total no. of chains: ', len(pdbchainlist))
|
||||
for chain_id in pdbchainlist:
|
||||
print('Chain id:', chain_id)
|
||||
dssp_cur = pd.DataFrame()
|
||||
dssp_cur = dms_tools2.dssp.processDSSP(inputdsspfile, chain = chain_id)
|
||||
|
@ -182,8 +197,11 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
|||
#%%
|
||||
|
||||
def main():
|
||||
print('Running dssp')
|
||||
print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
|
||||
dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
||||
my_chains = extract_chain_dssp(infile)
|
||||
dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
#%% end of script
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue