#!/home/tanu/anaconda3/envs/ContactMap/bin/python3 # -*- coding: utf-8 -*- """ Created on Tue Feb 18 10:10:12 2020 @author: tanu """ #======================================================================= # Task: Read a DSSP file into a data frame and output to a csv file # Input: '.dssp' i.e gene associated.dssp file (output from run_dssp.sh) # Output: '.csv' file containing DSSP output as a df ith ASA, RSA, etc. # useful links: #https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html #https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html #======================================================================= #%% load packages import sys, os import re import pandas as pd from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP import pandas as pd import pprint as pp #from Bio.PDB.PDBParser import PDBParser import dms_tools2 import dms_tools2.dssp #=======================================================================# #%% specify homedir and curr dir homedir = os.path.expanduser('~') # set working dir os.getcwd() os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis') os.getcwd() #======================================================================= #%% variable assignment: input and output drug = 'pyrazinamide' gene = 'pncA' #gene_match = gene + '_p.' #========== # data dir #========== #indir = 'git/Data/pyrazinamide/input/original' datadir = homedir + '/' + 'git/Data' #======= # input #======= indir = datadir + '/' + drug + '/' + 'output' #in_filename = 'pnca.dssp' in_filename = gene.lower() +'.dssp' infile = indir + '/' + in_filename print('Input filename:', in_filename , '\nInput path:', indir , '\n============================================================') # specify PDB chain my_chain = 'A' #======= # output #======= outdir = datadir + '/' + drug + '/' + 'output' out_filename = gene.lower() + '_dssp.csv' outfile = outdir + '/' + out_filename print('Output filename:', out_filename , '\nOutput path:', outdir , '\nOutfile: ', outfile , '\n=============================================================') #%% end of variable assignment for input and output files #======================================================================= # Process dssp output and extract into df dssp_file = infile dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain) # returns df with ASA and RSA (base on Tien at al 2013 (theor.) values) # Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area pp.pprint(dssp_df) #===================== # Renaming amino-acid # and site columns #===================== # Rename column (amino acid) as 'wild_type' and (site} as 'position' # to be the same names as used in the file required for merging later. dssp_df.columns dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True) dssp_df.columns #%% Write ouput csv file print('Writing file:', outfile , '\nFilename:', out_filename , '\nPath:', outdir , '\n=============================================================') # write to csv dssp_df.to_csv(outfile, header=True, index = False) print('Finished writing:', out_filename , '\nNo. of rows:', len(dssp_df) , '\nNo. of cols:', len(dssp_df.columns) , '\n==============================================================') #%% end of script #=======================================================================