modified dssp_df to handle multiple chains

This commit is contained in:
Tanushree Tunstall 2020-04-07 16:02:19 +01:00
parent d161fcd0f3
commit f690c75ca0
2 changed files with 119 additions and 41 deletions

View file

@ -6,6 +6,7 @@ Created on Tue Apr 7 09:30:16 2020
@author: tanu
"""
import sys, os
import argparse
import re
import pandas as pd
from Bio.PDB import PDBParser
@ -21,6 +22,13 @@ homedir = os.path.expanduser('~')
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
args = arg_parser.parse_args()
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
@ -29,8 +37,11 @@ os.getcwd()
#drug = 'isoniazid'
#gene = 'katG'
drug = 'cycloserine'
gene = 'alr'
#drug = 'cycloserine'
#gene = 'alr'
drug = args.drug
gene = args.gene
#==========
# data dir
#==========
@ -67,8 +78,8 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
"""
Create a DSSP file from a PDB file
@param infile: pdb file
@type infile: string
@param inputpdbfile: pdb file
@type inputpdbfile: string
@param outfile: dssp file
@type outfile: string
@ -92,14 +103,18 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
#print(dssp.keys()[len(dssp)-1][0])
def extract_chain_dssp(inputpdbfile):
"""
extracts chain_ids from dssp run on pdb file
This is to allow processing of dssp output to df
and for writing as csv file
Parameters
----------
inputpdbfile : TYPE
DESCRIPTION.
@param inputpdbfile: pdb file
@type inputpdbfile: string
Returns
-------
@return: chain_ids from dssp output of pdb file
@return: chain_ids from running dssp on pdb file
@type list
"""
@ -117,11 +132,11 @@ def extract_chain_dssp(inputpdbfile):
print(chainsL)
# sort the list (since sets are not ordered) for convenience
# this will be required for dssp_df
my_chains = sorted(chainsL)
pdbchainlist = sorted(chainsL)
print('dssp output for'
, in_filename, 'contains:', len(my_chains)
, 'chains:\n', my_chains)
return my_chains
, in_filename, 'contains:', len(pdbchainlist)
, 'chains:\n', pdbchainlist)
return pdbchainlist
#%%
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
@ -141,8 +156,8 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
"""
dssp_df = pd.DataFrame()
print('Total no. of chains: ', len(my_chains))
for chain_id in my_chains:
print('Total no. of chains: ', len(pdbchainlist))
for chain_id in pdbchainlist:
print('Chain id:', chain_id)
dssp_cur = pd.DataFrame()
dssp_cur = dms_tools2.dssp.processDSSP(inputdsspfile, chain = chain_id)
@ -182,8 +197,11 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
#%%
def main():
print('Running dssp')
print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
my_chains = extract_chain_dssp(infile)
dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
if __name__ == "__main__":
main()
#%% end of script