tidy code and renamed kd.py to kd_df.py

2020-03-26 15:43:13 +00:00 · 2020-03-26 15:43:13 +00:00 · a074d29f6e
commit a074d29f6e
parent 73e0029b65
6 changed files with 156 additions and 194 deletions
--- a/meta_data_analysis/dssp_df.py
+++ b/meta_data_analysis/dssp_df.py
@ -1,8 +1,22 @@
 #!/home/tanu/anaconda3/envs/ContactMap/bin/python3
-# Read a DSSP file into a data frame and pretty-print it
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Feb 18 10:10:12 2020

+@author: tanu
+"""
+#=======================================================================
+# Task: Read a DSSP file into a data frame and output to a csv file
+
+# Input: '.dssp' i.e gene associated.dssp file (output from run_dssp.sh)
+
+# Output: '.csv' file containing DSSP output as a df ith ASA, RSA, etc.
+
+# useful links:
 #https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
 #https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
+#=======================================================================
+#%% load packages
 import sys, os
 import re
 import pandas as pd
@ -13,16 +27,16 @@ import pprint as pp
 #from Bio.PDB.PDBParser import PDBParser
 import dms_tools2
 import dms_tools2.dssp
-
-#%% specify input and output variables
+#=======================================================================#
+#%% specify homedir and curr dir
 homedir = os.path.expanduser('~') 

-#%% set working dir
+# set working dir
 os.getcwd()
 os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
 os.getcwd()
 #=======================================================================
-#%% variable assignment: input and output paths & filenames
+#%% variable assignment: input and output 
 drug = 'pyrazinamide'
 gene = 'pncA'
 #gene_match = gene + '_p.'
@ -57,7 +71,7 @@ print('Output filename:', out_filename
      ,'\nOutfile: ', outfile)

 #%% end of variable assignment for input and output files
-#================================================================
+#=======================================================================
 # Process dssp output and extract into df
 dssp_file = infile
 dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
@ -65,6 +79,11 @@ dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
 # Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
 pp.pprint(dssp_df)

+#=====================
+# Renaming amino-acid
+# and site columns
+#=====================
+
 # Rename column (amino acid) as 'wild_type' and (site} as 'position' 
 # to be the same names as used in the file required for merging later.
 dssp_df.columns
@ -82,4 +101,6 @@ dssp_df.to_csv(outfile, header=True, index = False)
 print('Finished writing:', out_filename
     , '\nNo. of rows:', len(dssp_df)
     , '\nNo. of cols:', len(dssp_df.columns))
-print('======================================================================')
+print('======================================================================')
+#%% end of script
+#=======================================================================