updating kd script to take command line args

This commit is contained in:
Tanushree Tunstall 2020-04-07 16:13:54 +01:00
parent 24c7ade7c4
commit 44577b4a0c
2 changed files with 26 additions and 17 deletions

View file

@ -123,10 +123,13 @@ min(dfVals['kd_values'])
#===================
# concatenating dfs
#===================
# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2
# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
# Conveniently, the last position in this case is not part of the struc, so not much loss of info
# Needless to state that this will be variable for other targets.
# Merge the two on index
# (as these are now reflective of the aa position numbers): df1 and df2
# This will introduce NaN where there is missing values. In our case this
# will be 2 (first and last ones based on window size and offset)
# In our case this will be 2 (first and last ones)
# For pnca: the last position is not part of the struc, so not info loss
# Needless to say that this will be variable for other targets.
kd_df = pd.concat([dfSeq, dfVals], axis = 1)

View file

@ -5,6 +5,11 @@ Created on Tue Apr 7 09:30:16 2020
@author: tanu
"""
#=======================================================================
# TASK:
#=======================================================================
#%% load packages
import sys, os
import argparse
import re
@ -14,7 +19,7 @@ from Bio.PDB.DSSP import DSSP
import dms_tools2
import dms_tools2.dssp
import pprint as pp
#%%
#=======================================================================
#%% specify homedir and curr dir
homedir = os.path.expanduser('~')
@ -22,13 +27,13 @@ homedir = os.path.expanduser('~')
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
@ -73,6 +78,8 @@ dsspcsv_file = outdir + '/' + dsspcsv_filename
print('Outfile dssp to csv: ', dsspcsv_file
, '\n=============================================================')
#%% end of variable assignment for input and output files
#=======================================================================
#%% create .dssp from pdb
def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
"""
@ -92,10 +99,9 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
# out_file = infile +'.dssp'
# outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext
os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile))
#=======================================================================
#%% extract chain id from dssp
#print(dssp.keys())
#print(dssp.keys()[0][0])
#print(len(dssp))
@ -137,8 +143,8 @@ def extract_chain_dssp(inputpdbfile):
, in_filename, 'contains:', len(pdbchainlist)
, 'chains:\n', pdbchainlist)
return pdbchainlist
#%%
#=======================================================================
#%% write csv of processed dssp output
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
"""
Create a df from a dssp file containing ASA, RSA, SS for all chains
@ -188,14 +194,13 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
, '\nNo. of rows:', len(dssp_df)
, '\nNo. of cols:', len(dssp_df.columns)
, '\n==============================================================')
#%%
# call
#=======================================================================
#%% call functions
#dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
#my_chains = extract_chain_dssp(infile)
#dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
#%%
#=======================================================================
def main():
print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
@ -205,3 +210,4 @@ def main():
if __name__ == "__main__":
main()
#%% end of script
#=======================================================================