tidied and updated kd and dssp scripts & generated their respective outputs
This commit is contained in:
parent
87a847109a
commit
4c2fa2b600
6 changed files with 209 additions and 181 deletions
|
@ -1,68 +1,85 @@
|
|||
#!/home/tanu/anaconda3/envs/ContactMap/bin/python3
|
||||
# Read a DSSP file into a data frame and pretty-print it
|
||||
|
||||
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||
#https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html
|
||||
import sys, os
|
||||
import re
|
||||
import pandas as pd
|
||||
from Bio.PDB import PDBParser
|
||||
from Bio.PDB.DSSP import DSSP
|
||||
import pandas as pd
|
||||
import pprint as pp
|
||||
#from Bio.PDB.PDBParser import PDBParser
|
||||
import dms_tools2
|
||||
import dms_tools2.dssp
|
||||
|
||||
#%%
|
||||
# my working dir
|
||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis/struct_params')
|
||||
os.getcwd()
|
||||
#%%
|
||||
# sample example
|
||||
dssp_file = "./3pl1.dssp"
|
||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
|
||||
#%% specify input and output variables
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# outputs to console
|
||||
#returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
|
||||
#Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
|
||||
#%% set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||
os.getcwd()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
|
||||
#==========
|
||||
# data dir
|
||||
#==========
|
||||
#indir = 'git/Data/pyrazinamide/input/original'
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
indir = datadir + '/' + drug + '/' + 'output'
|
||||
#in_filename = 'pnca.dssp'
|
||||
in_filename = gene.lower() +'.dssp'
|
||||
infile = indir + '/' + in_filename
|
||||
print('Input filename:', in_filename
|
||||
, '\nInput path:', indir)
|
||||
|
||||
# specify PDB chain
|
||||
my_chain = 'A'
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
out_filename = gene.lower() + '_dssp_df'
|
||||
outfile = outdir + '/' + out_filename
|
||||
print('Output filename:', out_filename
|
||||
, '\nOutput path:', outdir
|
||||
,'\nOutfile: ', outfile)
|
||||
|
||||
#%% end of variable assignment for input and output files
|
||||
#================================================================
|
||||
# Process dssp output and extract into df
|
||||
dssp_file = infile
|
||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain = my_chain)
|
||||
# returns df with ASA and RSA (base on Tien at al 2013 (theor.) values)
|
||||
# Link: https://en.wikipedia.org/wiki/Relative_accessible_surface_area
|
||||
pp.pprint(dssp_df)
|
||||
|
||||
# write to csv
|
||||
dssp_df.to_csv('3pl1_dssp_df', header=True, index = False)
|
||||
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
||||
# to be the same names as used in the file required for merging later.
|
||||
dssp_df.columns
|
||||
dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type'}, inplace = True)
|
||||
dssp_df.columns
|
||||
|
||||
#%% specify variables for input and output paths and filenames
|
||||
drug = "pyrazinamide"
|
||||
#gene = "pnca"
|
||||
#%% Write ouput csv file
|
||||
print('Writing file:', outfile
|
||||
, '\nFilename:', out_filename
|
||||
, '\nPath:', outdir)
|
||||
|
||||
datadir = homedir + "/git/Data"
|
||||
basedir = datadir + "/" + drug + "/input"
|
||||
|
||||
# input
|
||||
inpath = "/processed"
|
||||
in_filename = "/3pl1.dssp"
|
||||
infile = basedir + inpath + in_filename
|
||||
#print(infile)
|
||||
|
||||
# output file
|
||||
outpath = "/output"
|
||||
outdir = datadir + "/" + drug + outpath
|
||||
out_filename = "/3pl1_dssp_df"
|
||||
outfile = outdir + out_filename
|
||||
print(outdir); print(outfile)
|
||||
|
||||
if not os.path.exists(datadir):
|
||||
print('Error!', datadir, 'does not exist. Please ensure it exists. Dir struc specified in README.md')
|
||||
os.makedirs(datadir)
|
||||
exit()
|
||||
|
||||
if not os.path.exists(outdir):
|
||||
print('Error!', outdir, 'does not exist.Please ensure it exists. Dir struc specified in README.md')
|
||||
exit()
|
||||
|
||||
else:
|
||||
print('Dir exists: Carrying on')
|
||||
# end of variable assignment for input and output files
|
||||
#%% <----- fixme
|
||||
dssp_file = infile
|
||||
dssp_df = dms_tools2.dssp.processDSSP(dssp_file, chain='A')
|
||||
|
||||
#%%
|
||||
# write to csv
|
||||
dssp_df.to_csv(outfile, header=True, index = False)
|
||||
|
||||
print('Finished writing:', out_filename
|
||||
, '\nNo. of rows:', len(dssp_df)
|
||||
, '\nNo. of cols:', len(dssp_df.columns))
|
||||
print('======================================================================')
|
Loading…
Add table
Add a link
Reference in a new issue