updating kd script to take command line args

2020-04-07 16:13:54 +01:00 · 2020-04-07 16:13:54 +01:00 · 44577b4a0c
commit 44577b4a0c
parent 24c7ade7c4
2 changed files with 26 additions and 17 deletions
--- a/meta_data_analysis/kd_df.py
+++ b/meta_data_analysis/kd_df.py
@ -123,10 +123,13 @@ min(dfVals['kd_values'])
 #===================
 # concatenating dfs
 #===================
-# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2 
+# Merge the two on index 
-# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
+# (as these are now reflective of the aa position numbers): df1 and df2 
-# Conveniently, the last position in this case is not part of the struc, so not much loss of info
+# This will introduce NaN where there is missing values. In our case this
-# Needless to state that this will be variable for other targets.
+# will be 2 (first and last ones based on window size and offset)
 # In our case this will be 2 (first and last ones)
 # For pnca: the last position is not part of the struc, so not info loss
 # Needless to say that this will be variable for other targets.
 kd_df = pd.concat([dfSeq, dfVals], axis = 1)
--- a/scripts/dssp_df.py
+++ b/scripts/dssp_df.py
@ -5,6 +5,11 @@ Created on Tue Apr  7 09:30:16 2020
@author: tanu
 """
 #=======================================================================
 # TASK:
 #=======================================================================
 #%% load packages
 import sys, os
 import argparse
 import re
@ -14,7 +19,7 @@ from Bio.PDB.DSSP import DSSP
 import dms_tools2
 import dms_tools2.dssp
 import pprint as pp
-#%%
+#=======================================================================
 #%% specify homedir and curr dir
 homedir = os.path.expanduser('~') 
@ -22,13 +27,13 @@ homedir = os.path.expanduser('~')
 os.getcwd()
 os.chdir(homedir + '/git/LSHTM_analysis/scripts')
 os.getcwd()
-
+#=======================================================================
 #%% command line args
 arg_parser = argparse.ArgumentParser()
-arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
+arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
-arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
+arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
 args = arg_parser.parse_args()
-
+#=======================================================================
 #%% variable assignment: input and output 
 #drug = 'pyrazinamide'
 #gene = 'pncA'
@ -73,6 +78,8 @@ dsspcsv_file =  outdir + '/' + dsspcsv_filename
 print('Outfile dssp to csv: ', dsspcsv_file
      , '\n=============================================================')
 #%% end of variable assignment for input and output files
 #=======================================================================
 #%% create .dssp from pdb
 def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
    """
@ -92,10 +99,9 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
 #    out_file = infile +'.dssp'
 #    outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext
    os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile))
-
+#=======================================================================
 #%% extract chain id from dssp
 #print(dssp.keys())
 #print(dssp.keys()[0][0])
 #print(len(dssp))
@ -137,8 +143,8 @@ def extract_chain_dssp(inputpdbfile):
          , in_filename, 'contains:', len(pdbchainlist)
          , 'chains:\n', pdbchainlist)
    return pdbchainlist
-
+#=======================================================================
-#%%
+#%% write csv of processed dssp output
 def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
    """
    Create a df from a dssp file containing ASA, RSA, SS for all chains
@ -188,14 +194,13 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
         , '\nNo. of rows:', len(dssp_df)
         , '\nNo. of cols:', len(dssp_df.columns)
         , '\n==============================================================')
-
+#=======================================================================
-#%%
+#%% call functions
 # call 
 #dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
 #my_chains = extract_chain_dssp(infile)
 #dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
 #%%
-
+#=======================================================================
 def main():
    print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
    dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
@ -205,3 +210,4 @@ def main():
 if __name__ == "__main__":
    main()
 #%% end of script
 #=======================================================================