diff --git a/meta_data_analysis/dssp_df.py b/meta_data_analysis/dssp_df.py
index 5d3dc64..a12f7d5 100755
--- a/meta_data_analysis/dssp_df.py
+++ b/meta_data_analysis/dssp_df.py
@@ -10,11 +10,12 @@ Created on Tue Feb 18 10:10:12 2020
 
 # Input: '.dssp' i.e gene associated.dssp file (output from run_dssp.sh)
 
-# Output: '.csv' file containing DSSP output as a df ith ASA, RSA, etc.
+# Output: '.csv' file containing DSSP output as a df with ASA, RSA, etc.
+	# based on Tien at al 2013 (theor.) values
 
 # useful links:
-#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
-#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
+# https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
+# https://en.wikipedia.org/wiki/Relative_accessible_surface_area
 #=======================================================================
 #%% load packages
 import sys, os
@@ -22,11 +23,10 @@ import re
 import pandas as pd
 from Bio.PDB import PDBParser
 from Bio.PDB.DSSP import DSSP
-import pandas as pd
 import pprint as pp
-#from Bio.PDB.PDBParser import PDBParser
 import dms_tools2
 import dms_tools2.dssp
+
 #=======================================================================#
 #%% specify homedir and curr dir
 homedir = os.path.expanduser('~') 
@@ -37,10 +37,12 @@ os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
 os.getcwd()
 #=======================================================================
 #%% variable assignment: input and output 
-drug = 'pyrazinamide'
-gene = 'pncA'
+#drug = 'pyrazinamide'
+#gene = 'pncA'
 #gene_match = gene + '_p.'
 
+drug = 'cycloserine'
+gene = 'alr'
 #==========
 # data dir
 #==========
@@ -50,17 +52,21 @@ datadir = homedir + '/' + 'git/Data'
 #=======
 # input from outdir
 #=======
-#indir = datadir + '/' + drug + '/' + 'output'
-outdir = datadir + '/' + drug + '/' + 'output'
-#in_filename = 'pnca.dssp'
-in_filename = gene.lower() +'.dssp'
-infile = indir + '/' + in_filename
-print('Input filename:', in_filename
-      , '\nInput path:', indir
+indir = datadir + '/' + drug + '/' + 'input'
+#1) pdb file
+in_filename_pdb = gene.lower() + '_complex' + '.pdb'
+infile_pdb = indir + '/' + in_filename_pdb
+print('Input pdb filename:', in_filename_pdb
+      , '\npath:', indir
+      , '\n============================================================')
+
+#2) dssp file
+outdir = datadir + '/' + drug + '/' + 'output'
+in_filename = gene.lower() +'.dssp'
+infile = outdir + '/' + in_filename
+print('Input dssp filename:', in_filename
+      , '\npath:', outdir
       , '\n============================================================')
-      
-# specify PDB chain
-my_chain = 'A'
 
 #=======
 # output 
@@ -69,30 +75,82 @@ outdir = datadir + '/' + drug + '/' + 'output'
 out_filename = gene.lower() + '_dssp.csv'
 outfile =  outdir + '/' + out_filename
 print('Output filename:', out_filename
-      , '\nOutput path:', outdir
+      , '\npath:', outdir
       , '\nOutfile: ', outfile
       , '\n=============================================================')
-      
 #%% end of variable assignment for input and output files
 #=======================================================================
-# Process dssp output and extract into df
-dssp_file = infile
-dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
-# returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
-# Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
-pp.pprint(dssp_df)
+#%% specify pdb chain as a list. Handy when more than 1 pdb chain
+my_chains = ['A']
+# my_chains = ['A', 'B'] # for cycloserine
+
+# generate my_chains from dssp
+p = PDBParser()
+structure = p.get_structure(in_filename_pdb, infile_pdb)
+model = structure[0]
+dssp = DSSP(model, infile_pdb)
+
+#print(dssp.keys())
+#print(dssp.keys()[0][0])
+#print(len(dssp))
+#print(dssp.keys()[0][0])
+#print(dssp.keys()[len(dssp)-1][0])
+
+dssp_chains = []
+for num_aa in range(0, len(dssp)):
+#    print(num_aa)
+#   extract the chain id only and append to a list
+    dssp_chains.append(dssp.keys()[num_aa][0])
+    chainsL = list(set(dssp_chains))
+    
+print(chainsL)
+# sort the list to make for sanity (since sets are not ordered)
+# this will be required for dssp_df
+my_chains = sorted(chainsL)
+
+print('dssp output for'
+      , in_filename, 'contains:', len(my_chains)
+      , 'chains:\n', my_chains)
+#%% ====================================================================
+# Process dssp output and extract into df (single chain)
+
+#dssp_df = dms_tools2.dssp.processDSSP(infile, chain = my_chains)
+#dssp_df['chain_dssp'] = my_chains
+#pp.pprint(dssp_df)
+#=======================================================================
+# incase pdb has > 1 chain and you need to run it for all chains
+
+# initialise an empty df
+dssp_df =  pd.DataFrame()
+print('Total no. of chains: ', len(my_chains))
+for chain_id in my_chains:
+    print('Chain id:', chain_id)
+    dssp_cur = pd.DataFrame()
+    dssp_cur = dms_tools2.dssp.processDSSP(infile, chain = chain_id)
+    #!!!Important!!!
+    dssp_cur['chain_id'] = chain_id 
+    dssp_df = dssp_df.append(dssp_cur)
+    pp.pprint(dssp_df)
 
 #=====================
 # Renaming amino-acid
 # and site columns
 #=====================
-
 # Rename column (amino acid) as 'wild_type' and (site} as 'position' 
 # to be the same names as used in the file required for merging later.
 dssp_df.columns
 dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True)
 dssp_df.columns
 
+# sanity check
+if len(dssp_df) == len(dssp):
+    print('PASS: length of dssp_df has correct length')
+else:
+    print('FAIL: length mismatch for dssp_df'
+          , '\nexpected length:', len(dssp)
+          , '\nGot length:', len(dssp_df)
+          , 'Debug please!')
+    
 #%% Write ouput csv file
 print('Writing file:', outfile
       , '\nFilename:', out_filename
@@ -108,3 +166,5 @@ print('Finished writing:', out_filename
      , '\n==============================================================')
 #%% end of script
 #=======================================================================
+#%% run dssp to extract chain ids to later process the dssp output into a df
+
diff --git a/scripts/dssp.py b/scripts/dssp.py
index 87716c0..080380c 100755
--- a/scripts/dssp.py
+++ b/scripts/dssp.py
@@ -6,6 +6,7 @@ Created on Tue Apr  7 09:30:16 2020
 @author: tanu
 """
 import sys, os
+import argparse
 import re
 import pandas as pd
 from Bio.PDB import PDBParser
@@ -21,6 +22,13 @@ homedir = os.path.expanduser('~')
 os.getcwd()
 os.chdir(homedir + '/git/LSHTM_analysis/scripts')
 os.getcwd()
+
+#%% command line args
+arg_parser = argparse.ArgumentParser()
+arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin')
+arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive
+args = arg_parser.parse_args()
+
 #%% variable assignment: input and output 
 #drug = 'pyrazinamide'
 #gene = 'pncA'
@@ -29,8 +37,11 @@ os.getcwd()
 #drug = 'isoniazid'
 #gene = 'katG'
 
-drug = 'cycloserine'
-gene = 'alr'
+#drug = 'cycloserine'
+#gene = 'alr'
+
+drug = args.drug
+gene = args.gene
 #==========
 # data dir
 #==========
@@ -67,8 +78,8 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
     """
     Create a DSSP file from a PDB file
 
-    @param infile: pdb file
-    @type infile: string
+    @param inputpdbfile: pdb file
+    @type inputpdbfile: string
 
     @param outfile: dssp file
     @type outfile: string
@@ -92,14 +103,18 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"):
 #print(dssp.keys()[len(dssp)-1][0])
 def extract_chain_dssp(inputpdbfile):
     """
+    extracts chain_ids from dssp run on pdb file
+    This is to allow processing of dssp output to df 
+    and for writing as csv file
+    
     Parameters
     ----------
-    inputpdbfile : TYPE
-        DESCRIPTION.
+    @param inputpdbfile: pdb file
+    @type inputpdbfile: string
 
     Returns
     -------
-    @return: chain_ids from dssp output of pdb file
+    @return: chain_ids from running dssp on pdb file
     @type list
 
     """
@@ -117,11 +132,11 @@ def extract_chain_dssp(inputpdbfile):
     print(chainsL)
     # sort the list (since sets are not ordered) for convenience
     # this will be required for dssp_df
-    my_chains = sorted(chainsL)
+    pdbchainlist = sorted(chainsL)
     print('dssp output for'
-          , in_filename, 'contains:', len(my_chains)
-          , 'chains:\n', my_chains)
-    return my_chains
+          , in_filename, 'contains:', len(pdbchainlist)
+          , 'chains:\n', pdbchainlist)
+    return pdbchainlist
 
 #%%
 def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
@@ -141,8 +156,8 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
         """
     dssp_df =  pd.DataFrame()
     
-    print('Total no. of chains: ', len(my_chains))
-    for chain_id in my_chains:
+    print('Total no. of chains: ', len(pdbchainlist))
+    for chain_id in pdbchainlist:
         print('Chain id:', chain_id)
         dssp_cur = pd.DataFrame()
         dssp_cur = dms_tools2.dssp.processDSSP(inputdsspfile, chain = chain_id)
@@ -182,8 +197,11 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist):
 #%%
 
 def main():
-    print('Running dssp')
+    print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename)
+    dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp")
+    my_chains = extract_chain_dssp(infile)
+    dssp_to_csv(dssp_file, dsspcsv_file, my_chains)
  
 if __name__ == "__main__":
     main()
-
+#%% end of script