diff --git a/meta_data_analysis/kd_df.py b/meta_data_analysis/kd_df.py index 2566cf8..f9b94fe 100644 --- a/meta_data_analysis/kd_df.py +++ b/meta_data_analysis/kd_df.py @@ -123,10 +123,13 @@ min(dfVals['kd_values']) #=================== # concatenating dfs #=================== -# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2 -# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones) -# Conveniently, the last position in this case is not part of the struc, so not much loss of info -# Needless to state that this will be variable for other targets. +# Merge the two on index +# (as these are now reflective of the aa position numbers): df1 and df2 +# This will introduce NaN where there is missing values. In our case this +# will be 2 (first and last ones based on window size and offset) +# In our case this will be 2 (first and last ones) +# For pnca: the last position is not part of the struc, so not info loss +# Needless to say that this will be variable for other targets. kd_df = pd.concat([dfSeq, dfVals], axis = 1) diff --git a/scripts/dssp_df.py b/scripts/dssp_df.py index 080380c..0aa223f 100755 --- a/scripts/dssp_df.py +++ b/scripts/dssp_df.py @@ -5,6 +5,11 @@ Created on Tue Apr 7 09:30:16 2020 @author: tanu """ +#======================================================================= +# TASK: + +#======================================================================= +#%% load packages import sys, os import argparse import re @@ -14,7 +19,7 @@ from Bio.PDB.DSSP import DSSP import dms_tools2 import dms_tools2.dssp import pprint as pp -#%% +#======================================================================= #%% specify homedir and curr dir homedir = os.path.expanduser('~') @@ -22,13 +27,13 @@ homedir = os.path.expanduser('~') os.getcwd() os.chdir(homedir + '/git/LSHTM_analysis/scripts') os.getcwd() - +#======================================================================= #%% command line args arg_parser = argparse.ArgumentParser() -arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazin') -arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pn') # case sensitive +arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide') +arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive args = arg_parser.parse_args() - +#======================================================================= #%% variable assignment: input and output #drug = 'pyrazinamide' #gene = 'pncA' @@ -73,6 +78,8 @@ dsspcsv_file = outdir + '/' + dsspcsv_filename print('Outfile dssp to csv: ', dsspcsv_file , '\n=============================================================') +#%% end of variable assignment for input and output files +#======================================================================= #%% create .dssp from pdb def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"): """ @@ -92,10 +99,9 @@ def dssp_file_from_pdb(inputpdbfile, outfile, DSSP = "dssp"): # out_file = infile +'.dssp' # outfile = os.path.splitext(inputpdbfile)[0]+'.dssp' # strip file ext os.system("%s -i %s -o %s" % (DSSP, inputpdbfile, outfile)) - +#======================================================================= #%% extract chain id from dssp - #print(dssp.keys()) #print(dssp.keys()[0][0]) #print(len(dssp)) @@ -137,8 +143,8 @@ def extract_chain_dssp(inputpdbfile): , in_filename, 'contains:', len(pdbchainlist) , 'chains:\n', pdbchainlist) return pdbchainlist - -#%% +#======================================================================= +#%% write csv of processed dssp output def dssp_to_csv(inputdsspfile, outfile, pdbchainlist): """ Create a df from a dssp file containing ASA, RSA, SS for all chains @@ -188,14 +194,13 @@ def dssp_to_csv(inputdsspfile, outfile, pdbchainlist): , '\nNo. of rows:', len(dssp_df) , '\nNo. of cols:', len(dssp_df.columns) , '\n==============================================================') - -#%% -# call +#======================================================================= +#%% call functions #dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp") #my_chains = extract_chain_dssp(infile) #dssp_to_csv(dssp_file, dsspcsv_file, my_chains) #%% - +#======================================================================= def main(): print('Running dssp on', in_filename, 'extracting df and output csv:', dsspcsv_filename) dssp_file_from_pdb(infile, dssp_file, DSSP = "dssp") @@ -205,3 +210,4 @@ def main(): if __name__ == "__main__": main() #%% end of script +#=======================================================================