tidy code and renamed kd.py to kd_df.py
This commit is contained in:
parent
73e0029b65
commit
a074d29f6e
6 changed files with 156 additions and 194 deletions
|
@ -1,8 +1,22 @@
|
|||
#!/home/tanu/anaconda3/envs/ContactMap/bin/python3
|
||||
# Read a DSSP file into a data frame and pretty-print it
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Tue Feb 18 10:10:12 2020
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
#=======================================================================
|
||||
# Task: Read a DSSP file into a data frame and output to a csv file
|
||||
|
||||
# Input: '.dssp' i.e gene associated.dssp file (output from run_dssp.sh)
|
||||
|
||||
# Output: '.csv' file containing DSSP output as a df ith ASA, RSA, etc.
|
||||
|
||||
# useful links:
|
||||
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||
#=======================================================================
|
||||
#%% load packages
|
||||
import sys, os
|
||||
import re
|
||||
import pandas as pd
|
||||
|
@ -13,16 +27,16 @@ import pprint as pp
|
|||
#from Bio.PDB.PDBParser import PDBParser
|
||||
import dms_tools2
|
||||
import dms_tools2.dssp
|
||||
|
||||
#%% specify input and output variables
|
||||
#=======================================================================#
|
||||
#%% specify homedir and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
#%% set working dir
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||
os.getcwd()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
#%% variable assignment: input and output
|
||||
drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
|
@ -57,7 +71,7 @@ print('Output filename:', out_filename
|
|||
,'\nOutfile: ', outfile)
|
||||
|
||||
#%% end of variable assignment for input and output files
|
||||
#================================================================
|
||||
#=======================================================================
|
||||
# Process dssp output and extract into df
|
||||
dssp_file = infile
|
||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
|
||||
|
@ -65,6 +79,11 @@ dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
|
|||
# Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
|
||||
pp.pprint(dssp_df)
|
||||
|
||||
#=====================
|
||||
# Renaming amino-acid
|
||||
# and site columns
|
||||
#=====================
|
||||
|
||||
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
||||
# to be the same names as used in the file required for merging later.
|
||||
dssp_df.columns
|
||||
|
@ -82,4 +101,6 @@ dssp_df.to_csv(outfile, header=True, index = False)
|
|||
print('Finished writing:', out_filename
|
||||
, '\nNo. of rows:', len(dssp_df)
|
||||
, '\nNo. of cols:', len(dssp_df.columns))
|
||||
print('======================================================================')
|
||||
print('======================================================================')
|
||||
#%% end of script
|
||||
#=======================================================================
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue