updating kd script to take command line args
This commit is contained in:
parent
24c7ade7c4
commit
44577b4a0c
2 changed files with 26 additions and 17 deletions
|
@ -123,10 +123,13 @@ min(dfVals['kd_values'])
|
||||||
#===================
|
#===================
|
||||||
# concatenating dfs
|
# concatenating dfs
|
||||||
#===================
|
#===================
|
||||||
# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2
|
# Merge the two on index
|
||||||
# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
|
# (as these are now reflective of the aa position numbers): df1 and df2
|
||||||
# Conveniently, the last position in this case is not part of the struc, so not much loss of info
|
# This will introduce NaN where there is missing values. In our case this
|
||||||
# Needless to state that this will be variable for other targets.
|
# will be 2 (first and last ones based on window size and offset)
|
||||||
|
# In our case this will be 2 (first and last ones)
|
||||||
|
# For pnca: the last position is not part of the struc, so not info loss
|
||||||
|
# Needless to say that this will be variable for other targets.
|
||||||
|
|
||||||
kd_df = pd.concat([dfSeq, dfVals], axis = 1)
|
kd_df = pd.concat([dfSeq, dfVals], axis = 1)
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,11 @@ Created on Tue Apr 7 09:30:16 2020
|
||||||
|
|
||||||
@author: tanu
|
@author: tanu
|
||||||
"""
|
"""
|
||||||
|
#=======================================================================
|
||||||
|
# TASK:
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
#%% load packages
|
||||||
import sys, os
|
import sys, os
|
||||||
import argparse
|
import argparse
|
||||||
import re
|
import re
|
||||||
|
@ -14,7 +19,7 @@ from Bio.PDB.DSSP import DSSP
|
||||||
import dms_tools2
|
import dms_tools2
|
||||||
import dms_tools2.dssp
|
import dms_tools2.dssp
|
||||||
import pprint as pp
|
import pprint as pp
|
||||||
#%%
|
#=======================================================================
|
||||||
#%% specify homedir and curr dir
|
#%% specify homedir and curr dir
|
||||||
homedir = os.path.expanduser('~')
|
homedir = os.path.expanduser('~')
|
||||||
|
|
||||||
|
@ -22,13 +27,13 @@ homedir = os.path.expanduser('~')
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
|
#=======================================================================
|
||||||
#%% command line args
|
#%% command line args
|
||||||
arg_parser = argparse.ArgumentParser()
|
arg_parser = argparse.ArgumentParser()
|
||||||
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
|
arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
|
||||||
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
|
arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
|
||||||
args = arg_parser.parse_args()
|
args = arg_parser.parse_args()
|
||||||
|
#=======================================================================
|
||||||
#%% variable assignment: input and output
|
#%% variable assignment: input and output
|
||||||
#drug = 'pyrazinamide'
|
#drug = 'pyrazinamide'
|
||||||
#gene = 'pncA'
|
#gene = 'pncA'
|
||||||
|
@ -73,6 +78,8 @@ dsspcsv_file = outdir + '/' + dsspcsv_filename
|
||||||
print('Outfile dssp to csv: ', dsspcsv_file
|
print('Outfile dssp to csv: ', dsspcsv_file
|
||||||
, '\n=============================================================')
|
, '\n=============================================================')
|
||||||
|
|
||||||
|
#%% end of variable assignment for input and output files
|
||||||
|
#=======================================================================
|
||||||
#%% create .dssp from pdb
|
#%% create .dssp from pdb
|
||||||
def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
||||||
"""
|
"""
|
||||||
|
@ -92,10 +99,9 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
|
||||||
# out_file = infile +'.dssp'
|
# out_file = infile +'.dssp'
|
||||||
# outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext
|
# outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext
|
||||||
os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile))
|
os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile))
|
||||||
|
#=======================================================================
|
||||||
#%% extract chain id from dssp
|
#%% extract chain id from dssp
|
||||||
|
|
||||||
|
|
||||||
#print(dssp.keys())
|
#print(dssp.keys())
|
||||||
#print(dssp.keys()[0][0])
|
#print(dssp.keys()[0][0])
|
||||||
#print(len(dssp))
|
#print(len(dssp))
|
||||||
|
@ -137,8 +143,8 @@ def extract_chain_dssp(inputpdbfile):
|
||||||
, in_filename, 'contains:', len(pdbchainlist)
|
, in_filename, 'contains:', len(pdbchainlist)
|
||||||
, 'chains:\n', pdbchainlist)
|
, 'chains:\n', pdbchainlist)
|
||||||
return pdbchainlist
|
return pdbchainlist
|
||||||
|
#=======================================================================
|
||||||
#%%
|
#%% write csv of processed dssp output
|
||||||
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
||||||
"""
|
"""
|
||||||
Create a df from a dssp file containing ASA, RSA, SS for all chains
|
Create a df from a dssp file containing ASA, RSA, SS for all chains
|
||||||
|
@ -188,14 +194,13 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
|
||||||
, '\nNo. of rows:', len(dssp_df)
|
, '\nNo. of rows:', len(dssp_df)
|
||||||
, '\nNo. of cols:', len(dssp_df.columns)
|
, '\nNo. of cols:', len(dssp_df.columns)
|
||||||
, '\n==============================================================')
|
, '\n==============================================================')
|
||||||
|
#=======================================================================
|
||||||
#%%
|
#%% call functions
|
||||||
# call
|
|
||||||
#dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
#dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
||||||
#my_chains = extract_chain_dssp(infile)
|
#my_chains = extract_chain_dssp(infile)
|
||||||
#dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
|
#dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
|
||||||
#%%
|
#%%
|
||||||
|
#=======================================================================
|
||||||
def main():
|
def main():
|
||||||
print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
|
print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
|
||||||
dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
|
||||||
|
@ -205,3 +210,4 @@ def main():
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
#%% end of script
|
#%% end of script
|
||||||
|
#=======================================================================
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue