From 1f8cfc2403a623fee3f45f2452219665e22cc3bc Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 8 Feb 2021 15:24:22 +0000 Subject: [PATCH 01/12] test2 bugfixes --- foldx/runFoldx.py | 5 +- foldx/runcomplex.sh | 1 - foldx/test2/mutrenamefiles.sh | 61 ++++++ foldx/test2/mutruncomplex.sh | 10 + foldx/test2/renamefiles.sh | 62 ++++++ foldx/test2/repairPDB.sh | 9 + foldx/test2/runFoldx.py | 344 ++++++++++++++++++++++++++++++++ foldx/test2/runFoldx_test.py | 250 +++++++++++++++++++++++ foldx/test2/runPrintNetworks.sh | 7 + foldx/test2/runcomplex.sh | 9 + foldx/test2/runfoldx.sh | 9 + scripts/data_extraction.py | 119 ++++++----- scripts/running_scripts | 34 ++-- 13 files changed, 851 insertions(+), 69 deletions(-) create mode 100755 foldx/test2/mutrenamefiles.sh create mode 100755 foldx/test2/mutruncomplex.sh create mode 100755 foldx/test2/renamefiles.sh create mode 100755 foldx/test2/repairPDB.sh create mode 100755 foldx/test2/runFoldx.py create mode 100755 foldx/test2/runFoldx_test.py create mode 100755 foldx/test2/runPrintNetworks.sh create mode 100755 foldx/test2/runcomplex.sh create mode 100755 foldx/test2/runfoldx.sh diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index 1538a6c..2bf0067 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -35,11 +35,12 @@ arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') -arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') +arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') +# FIXME: Doesn't work with 2 chains yet! arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive @@ -101,7 +102,7 @@ actual_pdb_filename = Path(infile_pdb).name if mut_filename: mutation_file = mut_filename else: - mutation_file = gene.lower() + '_mcsm_snps.csv' + mutation_file = gene.lower() + '_mcsm_formatted_snps.csv' infile_muts = outdir + '/' + mutation_file diff --git a/foldx/runcomplex.sh b/foldx/runcomplex.sh index 9cfd32a..0c0483d 100755 --- a/foldx/runcomplex.sh +++ b/foldx/runcomplex.sh @@ -7,4 +7,3 @@ logger "Running runcomplex" foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt #sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt - diff --git a/foldx/test2/mutrenamefiles.sh b/foldx/test2/mutrenamefiles.sh new file mode 100755 index 0000000..b1d8742 --- /dev/null +++ b/foldx/test2/mutrenamefiles.sh @@ -0,0 +1,61 @@ +PDB=$1 +n=$2 +#cd /home/git/LSHTM_analysis/foldx/test/ +cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt +sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt diff --git a/foldx/test2/mutruncomplex.sh b/foldx/test2/mutruncomplex.sh new file mode 100755 index 0000000..2b2c4e1 --- /dev/null +++ b/foldx/test2/mutruncomplex.sh @@ -0,0 +1,10 @@ +PDB=$1 +A=$2 +B=$3 +n=$4 +OUTDIR=$5 +cd ${OUTDIR} +logger "Running mutruncomplex" +foldx --command=AnalyseComplex --pdb="${PDB}_Repair_${n}.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 +cp ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt +#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt diff --git a/foldx/test2/renamefiles.sh b/foldx/test2/renamefiles.sh new file mode 100755 index 0000000..e5d8fe9 --- /dev/null +++ b/foldx/test2/renamefiles.sh @@ -0,0 +1,62 @@ +PDB=$1 +#cd /home/git/LSHTM_analysis/foldx/test +cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt +sed -i '1,8d' Dif_${PDB}_Repair.txt +cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt +cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt +sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt +cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt +cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt diff --git a/foldx/test2/repairPDB.sh b/foldx/test2/repairPDB.sh new file mode 100755 index 0000000..ee1a13c --- /dev/null +++ b/foldx/test2/repairPDB.sh @@ -0,0 +1,9 @@ +INDIR=$1 +PDB=$2 +OUTDIR=$3 + +logger "Running repairPDB" + +#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} + +foldx --command=RepairPDB --pdb-dir=${INDIR} --pdb=${PDB} --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py new file mode 100755 index 0000000..bf2a835 --- /dev/null +++ b/foldx/test2/runFoldx.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +import subprocess +import os +import sys +import numpy as np +import pandas as pd +from contextlib import suppress +from pathlib import Path +import re +import csv +import argparse +#https://realpython.com/python-pathlib/ + +# FIXME +#strong dependency of file and path names +#cannot pass file with path. Need to pass them separately +#assumptions made for dir struc as standard +#datadir + drug + input + +#======================================================================= +#%% specify input and curr dir +homedir = os.path.expanduser('~') + +# set working dir +os.getcwd() +#os.chdir(homedir + '/git/LSHTM_analysis/foldx/') +#os.getcwd() + +#======================================================================= +#%% command line args +arg_parser = argparse.ArgumentParser() + +arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None) +arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None) + +arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') +arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME + +arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') +arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') + +# FIXME: Doesn't work with 2 chains yet! +arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive +arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive + +args = arg_parser.parse_args() +#======================================================================= +#%% variable assignment: input and output +#drug = 'pyrazinamide' +#gene = 'pncA' +#gene_match = gene + '_p.' +#%%===================================================================== +# Command line options +drug = args.drug +gene = args.gene + +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir +process_dir = args.process_dir + +mut_filename = args.mutation_file +chainA = args.chain1 +chainB = args.chain2 +pdb_filename = args.pdb_file + +# os.path.splitext will fail interestingly with file.pdb.txt.zip +#pdb_name = os.path.splitext(pdb_file)[0] +# Just the filename, thanks +#pdb_name = Path(in_filename_pdb).stem + +#============== +# directories +#============== +if not datadir: + datadir = homedir + '/' + 'git/Data' + +if not indir: + indir = datadir + '/' + drug + '/input' + +if not outdir: + outdir = datadir + '/' + drug + '/output' + +#TODO: perhaps better handled by refactoring code to prevent generating lots of output files! +#if not process_dir: +# process_dir = datadir + '/' + drug + '/processing' + +# Make all paths absolute in case the user forgot +indir = os.path.abspath(indir) +process_dir = os.path.abspath(process_dir) +outdir = os.path.abspath(outdir) +datadir = os.path.abspath(datadir) + +#======= +# input +#======= +# FIXME +if pdb_filename: + pdb_name = Path(pdb_filename).stem +else: + pdb_filename = gene.lower() + '_complex.pdb' + pdb_name = Path(pdb_filename).stem + +infile_pdb = indir + '/' + pdb_filename +actual_pdb_filename = Path(infile_pdb).name + +if mut_filename: + mutation_file = os.path.abspath(mut_filename) + infile_muts = mutation_file + print('User-provided mutation file in use:', infile_muts) +else: + mutation_file = gene.lower() + '_mcsm_formatted_snps.csv' + infile_muts = outdir + '/' + mutation_file + print('WARNING: Assuming default mutation file:', infile_muts) + +#======= +# output +#======= +out_filename = gene.lower() + '_foldx.csv' +outfile_foldx = outdir + '/' + out_filename + +print('Arguments being passed:' +, '\nDrug:', args.drug +, '\ngene:', args.gene +, '\ninput dir:', indir +, '\nprocess dir:', process_dir +, '\noutput dir:', outdir +, '\npdb file:', infile_pdb +, '\npdb name:', pdb_name +, '\nactual pdb name:', actual_pdb_filename +, '\nmutation file:', infile_muts +, '\nchain1:', args.chain1 +, '\noutput file:', outfile_foldx +, '\n=============================================================') +#======================================================================= + +def getInteractionEnergy(filename): + data = pd.read_csv(filename,sep = '\t') + return data['Interaction Energy'].loc[0] + +def getInteractions(filename): + data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t') + contactList = getIndexes(data,1) + number = len(contactList) + return number + +def formatMuts(mut_file,pdbname): + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + muts = [] + for row in readCSV: + mut = row[0] + muts.append(mut) + + mut_list = [] + outfile = process_dir + '/individual_list_' + pdbname + '.txt' + with open(outfile, 'w') as output: + for m in muts: + print(m) + mut = m[:1] + chainA+ m[1:] + mut_list.append(mut) + mut = mut + ';' + print(mut) + output.write(mut) + output.write('\n') + return mut_list + +def getIndexes(data, value): + colnames = data.columns.values + listOfPos = list() + result = data.isin([value]) + result.columns = colnames + seriesdata = result.any() + columnNames = list(seriesdata[seriesdata==True].index) + for col in columnNames: + rows = list(result[col][result[col]==True].index) + + for row in rows: + listOfPos.append((row,col)) + + return listOfPos + +def loadFiles(df): + # load a text file in to np matrix + resultList = [] + f = open(df,'r') + for line in f: + line = line.rstrip('\n') + aVals = line.split('\t') + fVals = list(map(np.float32, sVals)) + resultList.append(fVals) + f.close() + return np.asarray(resultList, dtype=np.float32) + +#======================================================================= +def main(): + pdbname = pdb_name + comp = '' # for complex only + mut_filename = infile_muts #pnca_mcsm_snps.csv + mutlist = formatMuts(mut_filename, pdbname) + + print(mutlist) + nmuts = len(mutlist) + print(nmuts) + print(mutlist) + print('start') + #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) + subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) + + print('end') + output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) + + for n in range(1,nmuts+1): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) + + for n in range(1,nmuts+1): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) + + out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) + + if comp=='y': + chain1=chainA + chain2=chainB + with suppress(Exception): + subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) + for n in range(1,nmuts+1): + with suppress(Exception): + subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) + + interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', + 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', + 'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt' + dGdata = pd.read_csv(dGdatafile, sep = '\t') + + ddG=[] + print('ddG') + print(len(dGdata)) + for i in range(0,len(dGdata)): + ddG.append(dGdata['total energy'].loc[i]) + + + nint = len(interactions) + wt_int = [] + + for i in interactions: + filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt' + wt_int.append(getInteractions(filename)) + print('wt') + print(wt_int) + + ntotal = nint+1 + print(ntotal) + print(nmuts) + data = np.empty((ntotal,nmuts)) + data[0] = ddG + print(data) + for i in range(0,len(interactions)): + d=[] + p=0 + for n in range(1, nmuts+1): + print(i) + filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt' + mut = getInteractions(filename) + diff = wt_int[i] - mut + print(diff) + print(wt_int[i]) + print(mut) + d.append(diff) + print(d) + data[i+1] = d + + interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + print(interactions) + + IE = [] + if comp=='y': + wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + wtE = getInteractionEnergy(wtfilename) + print(wtE) + for n in range(1,nmuts+1): + print(n) + filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt' + mutE = getInteractionEnergy(filename) + print(mutE) + diff = wtE - mutE + print(diff) + IE.append(diff) + print(IE) + IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist) + IEfilename = 'foldx_complexresults_'+pdbname+'.csv' + IEresults.to_csv(IEfilename) + print(len(IE)) + data = np.append(data,[IE], axis = 0) + print(data) + interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy'] + + mut_file = process_dir + '/individual_list_' + pdbname + '.txt' + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + mutlist = [] + for row in readCSV: + mut = row[0] + mutlist.append(mut) + print(mutlist) + print(len(mutlist)) + print(data) + results = pd.DataFrame(data, columns = mutlist, index = interactions) + results.append(ddG) + #print(results.head()) + + # my style formatted results + results2 = results.T # transpose df + results2.index.name = 'mutationinformation' # assign name to index + results2 = results2.reset_index() # turn it into a columns + + results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id) + results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space + + results2.rename(columns = {'Distances': 'Contacts'}, inplace = True) + + # lower case columns + results2.columns = results2.columns.str.lower() + + print('Writing file in the format below:\n' + , results2.head() + , '\nNo. of rows:', len(results2) + , '\nNo. of cols:', len(results2.columns)) + + outputfilename = outfile_foldx + #outputfilename = 'foldx_results_' + pdbname + '.csv' + #results.to_csv(outputfilename) + results2.to_csv(outputfilename, index = False) + +if __name__ == '__main__': + main() diff --git a/foldx/test2/runFoldx_test.py b/foldx/test2/runFoldx_test.py new file mode 100755 index 0000000..27fc7a6 --- /dev/null +++ b/foldx/test2/runFoldx_test.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +import subprocess +import os +import numpy as np +import pandas as pd +from contextlib import suppress +import re +import csv + +def getInteractions(filename): + data = pd.read_csv(filename, index_col=0, header =0, sep="\t") + contactList = getIndexes(data,1) + print(contactList) + number = len(contactList) + return number + +def formatMuts(mut_file,pdbname): + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + muts = [] + for row in readCSV: + mut = row[0] + muts.append(mut) + + mut_list = [] + outfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/individual_list_"+pdbname+".txt" + with open(outfile, "w") as output: + for m in muts: + print(m) + mut = m[:1]+'A'+m[1:] + mut_list.append(mut) + mut = mut + ";" + print(mut) + output.write(mut) + output.write("\n") + return mut_list + +def getIndexes(data, value): + colnames = data.columns.values + listOfPos = list() + result = data.isin([value]) + result.columns=colnames + seriesdata = result.any() + columnNames = list(seriesdata[seriesdata==True].index) + for col in columnNames: + rows = list(result[col][result[col]==True].index) + + for row in rows: + listOfPos.append((row,col)) + + return listOfPos + +def loadFiles(df): + # load a text file in to np matrix + resultList = [] + f = open(df,'r') + for line in f: + line = line.rstrip('\n') + aVals = line.split("\t") + fVals = list(map(np.float32, sVals)) + resultList.append(fVals) + f.close() + return np.asarray(resultList, dtype=np.float32) + +#======================================================================= +def main(): + pdbname = '3pl1' + mut_filename = "pnca_muts_sample.csv" + mutlist = formatMuts(mut_filename, pdbname) + + print(mutlist) + nmuts = len(mutlist)+1 + print(nmuts) + print(mutlist) + print("start") + + output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname]) + print("end") + for n in range(1,nmuts): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname,str(n)]) + + for n in range(1,nmuts): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname,str(n)]) + + + out = subprocess.check_output(['bash','renamefiles.sh',pdbname]) + + dGdatafile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Dif_"+pdbname+"_Repair.txt" + dGdata = pd.read_csv(dGdatafile, sep="\t") + print(dGdata) + ddG=[] + for i in range(0,len(dGdata)): + ddG.append(dGdata['total energy'].loc[i]) + print(ddG) + distfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_PN.txt" + wt_nc = getInteractions(distfile) + + elecfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_PN.txt" + wt_neRR = getInteractions(elecfileRR) + + elecfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_MM_"+pdbname+"_Repair_PN.txt" + wt_neMM = getInteractions(elecfileMM) + + elecfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SM_"+pdbname+"_Repair_PN.txt" + wt_neSM = getInteractions(elecfileSM) + + elecfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SS_"+pdbname+"_Repair_PN.txt" + wt_neSS = getInteractions(elecfileSS) + + disufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_PN.txt" + wt_ndRR = getInteractions(disufileRR) + + disufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_MM_"+pdbname+"_Repair_PN.txt" + wt_ndMM = getInteractions(disufileMM) + + disufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SM_"+pdbname+"_Repair_PN.txt" + wt_ndSM = getInteractions(disufileSM) + + disufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SS_"+pdbname+"_Repair_PN.txt" + wt_ndSS = getInteractions(disufileSS) + + hbndfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_PN.txt" + wt_nhRR = getInteractions(hbndfileRR) + + hbndfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_MM_"+pdbname+"_Repair_PN.txt" + wt_nhMM = getInteractions(hbndfileMM) + + hbndfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SM_"+pdbname+"_Repair_PN.txt" + wt_nhSM = getInteractions(hbndfileSM) + + hbndfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SS_"+pdbname+"_Repair_PN.txt" + wt_nhSS = getInteractions(hbndfileSS) + + partfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_PN.txt" + wt_npRR = getInteractions(partfileRR) + + partfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_MM_"+pdbname+"_Repair_PN.txt" + wt_npMM = getInteractions(partfileMM) + + partfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SM_"+pdbname+"_Repair_PN.txt" + wt_npSM = getInteractions(partfileSM) + + partfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SS_"+pdbname+"_Repair_PN.txt" + wt_npSS = getInteractions(partfileSS) + + vdwcfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_PN.txt" + wt_nvRR = getInteractions(vdwcfileRR) + + vdwcfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_MM_"+pdbname+"_Repair_PN.txt" + wt_nvMM = getInteractions(vdwcfileMM) + + vdwcfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SM_"+pdbname+"_Repair_PN.txt" + wt_nvSM = getInteractions(vdwcfileSM) + + vdwcfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SS_"+pdbname+"_Repair_PN.txt" + wt_nvSS = getInteractions(vdwcfileSS) + + volufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_PN.txt" + wt_nvoRR = getInteractions(volufileRR) + + volufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_MM_"+pdbname+"_Repair_PN.txt" + wt_nvoMM = getInteractions(volufileMM) + + volufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SM_"+pdbname+"_Repair_PN.txt" + wt_nvoSM = getInteractions(volufileSM) + + volufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SS_"+pdbname+"_Repair_PN.txt" + wt_nvoSS = getInteractions(volufileSS) + + dnc = [] + dneRR = [] + dneMM = [] + dneSM = [] + dneSS = [] + dndRR = [] + dndMM = [] + dndSM = [] + dndSS = [] + dnhRR = [] + dnhMM = [] + dnhSM = [] + dnhSS = [] + dnpRR = [] + dnpMM = [] + dnpSM = [] + dnpSS = [] + dnvRR = [] + dnvMM = [] + dnvSM = [] + dnvSS = [] + dnvoRR = [] + dnvoMM = [] + dnvoSM = [] + dnvoSS = [] + for n in range(1, nmuts): + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_nc = getInteractions(filename) + diffc = wt_nc - mut_nc + dnc.append(diffc) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_neRR = getInteractions(filename) + diffeRR = wt_neRR - mut_neRR + dneRR.append(diffeRR) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_ndRR = getInteractions(filename) + diffdRR = wt_ndRR - mut_ndRR + dndRR.append(diffdRR) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_nhRR = getInteractions(filename) + diffhRR = wt_nhRR - mut_nhRR + dnhRR.append(diffhRR) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_npRR = getInteractions(filename) + diffpRR = wt_npRR - mut_npRR + dnpRR.append(diffpRR) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_nvRR = getInteractions(filename) + diffvRR = wt_nvRR - mut_nvRR + dnvRR.append(diffvRR) + + filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt" + mut_nvoRR = getInteractions(filename) + diffvoRR = wt_nvoRR - mut_nvoRR + dnvoRR.append(diffvoRR) + print(dnc) + print(dneRR) + print(dndRR) + print(dnhRR) + print(dnpRR) + print(dnvRR) + print(dnvoRR) + + results = pd.DataFrame([(ddG),(dnc),(dneRR),(dndRR),(dnhRR),(dnpRR),(dnvRR),(dnvoRR)], columns=mutlist, index=["ddG","contacts","electro","disulfide","hbonds","partcov","VdWClashes","volumetric"]) + results.append(ddG) + print(results) + results2 = results.T # transpose df + outputfilename = "foldx_results_"+pdbname+".csv" +# results.to_csv(outputfilename) + results2.to_csv(outputfilename) +if __name__ == "__main__": + main() diff --git a/foldx/test2/runPrintNetworks.sh b/foldx/test2/runPrintNetworks.sh new file mode 100755 index 0000000..c2bebab --- /dev/null +++ b/foldx/test2/runPrintNetworks.sh @@ -0,0 +1,7 @@ +PDB=$1 +n=$2 +OUTDIR=$3 +logger "Running runPrintNetworks" +cd ${OUTDIR} + +foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} diff --git a/foldx/test2/runcomplex.sh b/foldx/test2/runcomplex.sh new file mode 100755 index 0000000..0c0483d --- /dev/null +++ b/foldx/test2/runcomplex.sh @@ -0,0 +1,9 @@ +PDB=$1 +A=$2 +B=$3 +OUTDIR=$4 +cd ${OUTDIR} +logger "Running runcomplex" +foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} +cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt +#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt diff --git a/foldx/test2/runfoldx.sh b/foldx/test2/runfoldx.sh new file mode 100755 index 0000000..fa7ba84 --- /dev/null +++ b/foldx/test2/runfoldx.sh @@ -0,0 +1,9 @@ +PDB=$1 +OUTDIR=$2 +cd ${OUTDIR} +pwd +ls -l +logger "Running runfoldx" +foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=. +foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=. +foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=. diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index 98a4415..e631695 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -26,7 +26,7 @@ Created on Tue Aug 6 12:56:03 2019 # 1) _gwas.csv # 2) _common_ids.csv # 3) _ambiguous_muts.csv -# 4) _mcsm_snps.csv +# 4) _mcsm_formatted_snps.csv # 5) _metadata_poscounts.csv # 6) _metadata.csv # 7) _all_muts_msa.csv @@ -81,8 +81,8 @@ indir = args.input_dir outdir = args.output_dir make_dirs = args.make_dirs -#drug = 'ethambutol' -#gene = 'embB' +#drug = 'streptomycin' +#gene = 'gid' #%% input and output dirs and files #======= @@ -122,15 +122,15 @@ if make_dirs: # handle missing dirs here if not os.path.isdir(datadir): print('ERROR: Data directory does not exist:', datadir - , '\nPlease create and ensure gwas data is present and then rerun\nelse specify cmd option ---make_dirs') + , '\nPlease create and ensure gwas data is present and then rerun\nelse specify cmd option --make_dirs') sys.exit() if not os.path.isdir(indir): print('ERROR: Input directory does not exist:', indir - , '\nPlease either create or specify indir and rerun\nelse specify cmd option ---make_dirs') + , '\nPlease either create or specify indir and rerun\nelse specify cmd option --make_dirs') sys.exit() if not os.path.isdir(outdir): print('ERROR: Output directory does not exist:', outdir - , '\nPlease create or specify outdir and rerun\nelse specify cmd option ---make_dirs') + , '\nPlease create or specify outdir and rerun\nelse specify cmd option --make_dirs') sys.exit() # Requires @@ -317,7 +317,7 @@ for i, id in enumerate(clean_df.id): print('RESULTS:') print('Total WT in dr_muts_col:', wt) print('Total matches of', gene, 'SNP matches in', dr_muts_col, ':', dr_gene_count) -print('Total samples with > 1', gene, 'muts in dr_muts_col:', len(id2_dr) ) +print('Total samples with > 1', gene, 'nsSNPs in dr_muts_col:', len(id2_dr) ) print('=================================================================') del(clean_df, na_count, i, id, wt, id2_dr, count_gene_dr, count_wt) @@ -361,7 +361,7 @@ for i, id in enumerate(clean_df.id): print('RESULTS:') print('Total WT in other_muts_col:', wt_other) print('Total matches of', gene, 'SNP matches in', other_muts_col, ':', other_gene_count) -print('Total samples with > 1', gene, 'muts in other_muts_col:', len(id2_other) ) +print('Total samples with > 1', gene, 'nsSNPs in other_muts_col:', len(id2_other) ) print('=================================================================') print('Predicting total no. of rows in the curated df:', dr_gene_count + other_gene_count @@ -851,7 +851,7 @@ else: , '\nMuts are unique to dr_ and other_ mutation class' , '\n=========================================================') -# inspect dr_muts and other muts +# inspect dr_muts and other muts: Fixed in case no ambiguous muts detected! if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0: print('Finding ambiguous muts...' , '\n=========================================================' @@ -860,24 +860,37 @@ if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0: , '\n=========================================================' , '\nTotal no. of samples in other_muts present in dr_muts:', other_muts.isin(dr_muts).sum() , '\nThese are:\n', other_muts[other_muts.isin(dr_muts)] - , '\n=========================================================') + , '\n=========================================================') + + print('Counting no. of ambiguous muts...' + , '\nNo. of ambiguous muts in dr:' + , len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()) + , '\nNo. of ambiguous muts in other:' + , len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()) + , '\n=========================================================') + + if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique(): + common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() + print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts)) + , '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n') + print('\n===========================================================') else: - sys.exit('Error: ambiguous muts present, but extraction failed. Debug!') - -print('Counting no. of ambiguous muts...') - -if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique(): - common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() - print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts)) - , '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n') - print('\n===========================================================') -else: - print('Error: ambiguous muts detected, but extraction failed. Debug!' - , '\nNo. of ambiguous muts in dr:' - , len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()) - , '\nNo. of ambiguous muts in other:' - , len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()) - , '\n=========================================================') + #sys.exit('Error: ambiguous muts present, but extraction failed. Debug!') + print('No: ambiguous muts present') + +#print('Counting no. of ambiguous muts...') +#if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique(): +# common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() +# print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts)) +# , '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n') +# print('\n===========================================================') +#else: +# print('Error: ambiguous muts detected, but extraction failed. Debug!' +# , '\nNo. of ambiguous muts in dr:' +# , len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()) +# , '\nNo. of ambiguous muts in other:' +# , len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()) +# , '\n=========================================================') #%% clear variables del(id_dr, id_other @@ -893,25 +906,24 @@ del(c1, c2, col_to_split1, col_to_split2, comp_gene_samples, dr_WF0, dr_df, dr_m #print(outdir) #dr_muts.to_csv('dr_muts.csv', header = True) #other_muts.to_csv('other_muts.csv', header = True) +if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0: + out_filename_ambig_muts = gene.lower() + '_ambiguous_muts.csv' + outfile_ambig_muts = outdir + '/' + out_filename_ambig_muts + print('\n----------------------------------' + , '\nWriting file: ambiguous muts' + , '\n----------------------------------' + , '\nFilename:', outfile_ambig_muts) + inspect = gene_LF1[gene_LF1['mutation'].isin(common_muts)] + inspect.to_csv(outfile_ambig_muts, index = False) -out_filename_ambig_muts = gene.lower() + '_ambiguous_muts.csv' -outfile_ambig_muts = outdir + '/' + out_filename_ambig_muts -print('\n----------------------------------' - , '\nWriting file: ambiguous muts' - , '\n----------------------------------' - , '\nFilename:', outfile_ambig_muts) + print('Finished writing:', out_filename_ambig_muts + , '\nNo. of rows:', len(inspect) + , '\nNo. of cols:', len(inspect.columns) + , '\nNo. of rows = no. of samples with the ambiguous muts present:' + , dr_muts.isin(other_muts).sum() + other_muts.isin(dr_muts).sum() + , '\n=============================================================') + del(out_filename_ambig_muts) -inspect = gene_LF1[gene_LF1['mutation'].isin(common_muts)] -inspect.to_csv(outfile_ambig_muts, index = False) - -print('Finished writing:', out_filename_ambig_muts - , '\nNo. of rows:', len(inspect) - , '\nNo. of cols:', len(inspect.columns) - , '\nNo. of rows = no. of samples with the ambiguous muts present:' - , dr_muts.isin(other_muts).sum() + other_muts.isin(dr_muts).sum() - , '\n=============================================================') - -del(out_filename_ambig_muts) #%% end of data extraction and some files writing. Below are some more files writing. #============================================================================= #%% Formatting df: read aa dict and pull relevant info @@ -1181,7 +1193,7 @@ if snps_only.mutationinformation.isna().sum() == 0: else: sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?') -out_filename_mcsmsnps = gene.lower() + '_mcsm_snps.csv' +out_filename_mcsmsnps = gene.lower() + '_mcsm_formatted_snps.csv' outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps print('\n----------------------------------' @@ -1215,7 +1227,7 @@ metadata_pos.sort_values(by = ['meta_pos_count'], ascending = False, inplace = T out_filename_metadata_poscounts = gene.lower() + '_metadata_poscounts.csv' outfile_metadata_poscounts = outdir + '/' + out_filename_metadata_poscounts print('\n----------------------------------' - , 'Writing file: Metadata poscounts' + , '\nWriting file: Metadata poscounts' , '\n----------------------------------' , '\nFile:', outfile_metadata_poscounts , '\n============================================================') @@ -1309,7 +1321,7 @@ out_filename_pos = gene.lower() + '_mutational_positons.csv' outfile_pos = outdir + '/' + out_filename_pos print('\n----------------------------------' - , 'Writing file: mutational positions' + , '\nWriting file: mutational positions' , '\n----------------------------------' , '\nFile:', outfile_pos , '\nNo. of distinct positions:', len(pos_only_sorted) @@ -1349,15 +1361,14 @@ print('============================================' , '\nTotal no. of samples with missense muts:', len(gene_LF1) , '\nTotal no. of unique samples with missense muts:', gene_LF1['id'].nunique() , '\n' - , '\nTotal no.of samples with common_ids:', nu_common_ids['id'] - , '\nTotal no.of samples with ambiguous muts:', len(inspect) - #, '\nTotal no.of unique ambiguous muts:', len(common_muts) - , '\nTotal no.of unique ambiguous muts:', inspect['mutation'].nunique() - , '\n=============================================================' - , '\n\n\n') - - + , '\nTotal no.of samples with common_ids:', nu_common_ids['id']) +if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0: + print('\nTotal no.of samples with ambiguous muts:', len(inspect) + #, '\nTotal no.of unique ambiguous muts:', len(common_muts) + , '\nTotal no.of unique ambiguous muts:', inspect['mutation'].nunique() + , '\n=============================================================' + , '\n\n\n') #======================================================================= print(u'\u2698' * 50, '\nEnd of script: Data extraction and writing files' diff --git a/scripts/running_scripts b/scripts/running_scripts index 646c654..ee109db 100644 --- a/scripts/running_scripts +++ b/scripts/running_scripts @@ -1,32 +1,42 @@ #======== # data extraction: Must be run first to extract mutations for each drug-gene combination #======== -./data_extraction.py -d pyrazinamide -g pncA +./data_extraction.py -d -g --make_dirs + +#======== +# add chains to a PDB file: for modeller models lack chain ID, this script is used +#======== +add_chains_pdb.py MY_PDB.pdb + +#======== +# pdb data extraction: To find out discontinuity of chain and removing invalid muts to allow foldx and mcsm to run properly! +#======== +In progress... #======== # foldx: specify chain if default is NOT 'A' #======== -./runFoldx.py -d pyrazinamide -g pncA +./runFoldx.py -d -g -c1 A -p /media/tanu/eb1d072a-3f73-427f-aeb8-f6852b5c5216/Data/processing #======== # mcsm: specify chain if default is NOT 'A' #======== -./run_mcsm.py -d pyrazinamide -g pncA -s submit -l PZA --debug -./run_mcsm.py -d pyrazinamide -g pncA -s get -./run_mcsm.py -d pyrazinamide -g pncA -s format +./run_mcsm.py -d -g -s submit -l PZA --debug +./run_mcsm.py -d -g pncA -s get +./run_mcsm.py -d -g pncA -s format #==================== # other struct params #==================== -./dssp_df.py -d pyrazinamide -g pncA +./dssp_df.py -d -g # Errors on matplot.lib warn=, so just comment it out for the timebeing!: MONKEY PATCH -./kd_df.py -d pyrazinamide -g pncA -fasta # fixme: NO of cols says 2, but is actually 3 -./rd_df.py -d pyrazinamide -g pncA # fixme: input tsv file is sourced manually from website! +./kd_df.py -d -g -fasta # fixme: NO of cols says 2, but is actually 3 +./rd_df.py -d -g # fixme: input tsv file is sourced manually from website! #============================== # af_or calcs: different types #============================== -./af_or_calcs.R --d pyrazinamide --gene pncA # fixme: No conditional dir structure +./af_or_calcs.R -d -g # fixme: No conditional dir structure #============================== # af_or calcs: kinship @@ -40,18 +50,18 @@ USE THE BELOW from within the or_kinship_link.py script or something?! as part o # for now use the file already created using some manual wrestling to link # the OR for kinship with mutations -./or_kinship_link.py -d pyrazinamide -g pncA -sc 2288681 -ec 2289241 +./or_kinship_link.py -d -g -sc -ec #============================== # formatting: ns_snp_info.txt #============================== # This adds mcsm style muts -./snpinfo_format.py -d pyrazinamide -g pncA +./snpinfo_format.py -d -g #============================== # combining dfs: combining_dfs.py #============================== # FIXME: combining_FIXME.py -./combining_dfs.py -d pyrazinamide -g pncA +./combining_dfs.py --d -g From fa25a30dcf556993ef487728c8d99d5a9b0f52d5 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 8 Feb 2021 15:44:21 +0000 Subject: [PATCH 02/12] fixup broken shell scripts --- foldx/test2/mutrenamefiles.sh | 4 +++- foldx/test2/renamefiles.sh | 3 +++ foldx/test2/runFoldx.py | 23 ++++++++++++++++++----- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/foldx/test2/mutrenamefiles.sh b/foldx/test2/mutrenamefiles.sh index b1d8742..d9f938c 100755 --- a/foldx/test2/mutrenamefiles.sh +++ b/foldx/test2/mutrenamefiles.sh @@ -1,6 +1,8 @@ PDB=$1 n=$2 -#cd /home/git/LSHTM_analysis/foldx/test/ +OUTDIR=$3 +cd ${OUTDIR} + cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt diff --git a/foldx/test2/renamefiles.sh b/foldx/test2/renamefiles.sh index e5d8fe9..2a2b193 100755 --- a/foldx/test2/renamefiles.sh +++ b/foldx/test2/renamefiles.sh @@ -1,4 +1,7 @@ PDB=$1 +OUTDIR=$2 +cd ${OUTDIR} + #cd /home/git/LSHTM_analysis/foldx/test cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt sed -i '1,8d' Dif_${PDB}_Repair.txt diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py index bf2a835..42db217 100755 --- a/foldx/test2/runFoldx.py +++ b/foldx/test2/runFoldx.py @@ -207,31 +207,44 @@ def main(): print(mutlist) print('start') #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) + print('\033[95mSTAGE: repair PDB\033[0m') subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) - - print('end') + print('\033[95mCOMPLETE: repair PDB\033[0m') + print('\033[95mSTAGE: run FoldX (shell)\033[0m') output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) + print('\033[95mCOMPLETE: run FoldX (shell)\033[0m') + print('\033[95mSTAGE: Print Networks (shell)\033[0m') for n in range(1,nmuts+1): - print(n) + print('\033[95mNETWORK:\033[0m', n) + print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir )) with suppress(Exception): subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) - + print('\033[95mCOMPLETE: Print Networks (shell)\033[0m') + + print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m') for n in range(1,nmuts+1): - print(n) + print('\033[95mMUTATION:\033[0m', n) + print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir )) with suppress(Exception): subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) + print('\033[95mCOMPLETE: Rename Mutation Files (shell)\033[0m') + print('\033[95mSTAGE: Rename Files (shell)\033[0m') out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) + print('\033[95mCOMPLETE: Rename Files (shell)\033[0m') if comp=='y': + print('\033[95mSTAGE: Run Complex (shell)\033[0m') chain1=chainA chain2=chainB with suppress(Exception): subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) for n in range(1,nmuts+1): + print('\033[95mSTAGE: Run Mutation Complex (shell) for mutation:\033[0m', n) with suppress(Exception): subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) + print('\033[95mCOMPLETE: Run Complex (shell)\033[0m') interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', From 99b77434b5210f65db3c61a3f54001d559c46906 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 8 Feb 2021 16:16:53 +0000 Subject: [PATCH 03/12] more debug --- foldx/test2/repairPDB.sh | 2 +- foldx/test2/runFoldx.py | 3 +++ foldx/test2/runfoldx.sh | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/foldx/test2/repairPDB.sh b/foldx/test2/repairPDB.sh index ee1a13c..8db843c 100755 --- a/foldx/test2/repairPDB.sh +++ b/foldx/test2/repairPDB.sh @@ -1,7 +1,7 @@ INDIR=$1 PDB=$2 OUTDIR=$3 - +cd ${OUTDIR} logger "Running repairPDB" #foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py index 42db217..23bed97 100755 --- a/foldx/test2/runFoldx.py +++ b/foldx/test2/runFoldx.py @@ -105,6 +105,7 @@ else: infile_pdb = indir + '/' + pdb_filename actual_pdb_filename = Path(infile_pdb).name +#actual_pdb_filename = os.path.abspath(infile_pdb) if mut_filename: mutation_file = os.path.abspath(mut_filename) @@ -208,9 +209,11 @@ def main(): print('start') #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) print('\033[95mSTAGE: repair PDB\033[0m') + print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir)) subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) print('\033[95mCOMPLETE: repair PDB\033[0m') print('\033[95mSTAGE: run FoldX (shell)\033[0m') + print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir)) output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) print('\033[95mCOMPLETE: run FoldX (shell)\033[0m') diff --git a/foldx/test2/runfoldx.sh b/foldx/test2/runfoldx.sh index fa7ba84..4f86033 100755 --- a/foldx/test2/runfoldx.sh +++ b/foldx/test2/runfoldx.sh @@ -4,6 +4,6 @@ cd ${OUTDIR} pwd ls -l logger "Running runfoldx" -foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=. -foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=. -foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=. +foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR} +foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} +foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} From 9df3913a84a42bfbe53343248ef916533d3e233c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 8 Feb 2021 16:59:42 +0000 Subject: [PATCH 04/12] modifying script to avoid invoking bash as a subprocess --- foldx/test2/runFoldx.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py index 23bed97..2ba784d 100755 --- a/foldx/test2/runFoldx.py +++ b/foldx/test2/runFoldx.py @@ -195,6 +195,19 @@ def loadFiles(df): f.close() return np.asarray(resultList, dtype=np.float32) +# TODO: use this code pattern rather than invoking bash +#def repairPDB(): +# subprocess.call(['foldx' +# , '--command=RepairPDB' +# , '--pdb-dir=' + indir +# , '--pdb=' + actual_pdb_filename +# , '--ionStrength=0.05'# +# , '--pH=7' +# , '--water=PREDICT' +# , '--vdwDesign=1' +# , 'outPDB=true' +# , '--output-dir=' + process_dir]) + #======================================================================= def main(): pdbname = pdb_name @@ -210,7 +223,19 @@ def main(): #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) print('\033[95mSTAGE: repair PDB\033[0m') print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir)) - subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) + #subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) + # once you decide to use the function + # repairPDB(pdbname) + subprocess.call(['foldx' + , '--command=RepairPDB' + , '--pdb-dir=' + indir + , '--pdb=' + actual_pdb_filename + , '--ionStrength=0.05'# + , '--pH=7' + , '--water=PREDICT' + , '--vdwDesign=1' + , 'outPDB=true' + , '--output-dir=' + process_dir]) print('\033[95mCOMPLETE: repair PDB\033[0m') print('\033[95mSTAGE: run FoldX (shell)\033[0m') print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir)) @@ -242,7 +267,9 @@ def main(): chain1=chainA chain2=chainB with suppress(Exception): - subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) + #subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) + subprocess(['foldx --command=AnalyseComplex --pdb="%s_Repair.pdb" --analyseComplexChains=%s,%s --water=PREDICT --vdwDesign=1 --output-dir=%s'] % (pdbname, chain1, chain2, process_dir)) + for n in range(1,nmuts+1): print('\033[95mSTAGE: Run Mutation Complex (shell) for mutation:\033[0m', n) with suppress(Exception): From 86670bbac335faa9464508c709dbca304a466fdd Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 8 Feb 2021 18:06:02 +0000 Subject: [PATCH 05/12] remove shell scripts run with subprocess() and launch foldx directly from python --- .../{ => deprecated_shell}/mutruncomplex.sh | 0 .../test2/{ => deprecated_shell}/repairPDB.sh | 0 .../runPrintNetworks.sh | 0 .../{ => deprecated_shell}/runcomplex.sh | 0 .../test2/{ => deprecated_shell}/runfoldx.sh | 0 foldx/test2/runFoldx.py | 103 +++++++++++++++--- 6 files changed, 86 insertions(+), 17 deletions(-) rename foldx/test2/{ => deprecated_shell}/mutruncomplex.sh (100%) rename foldx/test2/{ => deprecated_shell}/repairPDB.sh (100%) rename foldx/test2/{ => deprecated_shell}/runPrintNetworks.sh (100%) rename foldx/test2/{ => deprecated_shell}/runcomplex.sh (100%) rename foldx/test2/{ => deprecated_shell}/runfoldx.sh (100%) diff --git a/foldx/test2/mutruncomplex.sh b/foldx/test2/deprecated_shell/mutruncomplex.sh similarity index 100% rename from foldx/test2/mutruncomplex.sh rename to foldx/test2/deprecated_shell/mutruncomplex.sh diff --git a/foldx/test2/repairPDB.sh b/foldx/test2/deprecated_shell/repairPDB.sh similarity index 100% rename from foldx/test2/repairPDB.sh rename to foldx/test2/deprecated_shell/repairPDB.sh diff --git a/foldx/test2/runPrintNetworks.sh b/foldx/test2/deprecated_shell/runPrintNetworks.sh similarity index 100% rename from foldx/test2/runPrintNetworks.sh rename to foldx/test2/deprecated_shell/runPrintNetworks.sh diff --git a/foldx/test2/runcomplex.sh b/foldx/test2/deprecated_shell/runcomplex.sh similarity index 100% rename from foldx/test2/runcomplex.sh rename to foldx/test2/deprecated_shell/runcomplex.sh diff --git a/foldx/test2/runfoldx.sh b/foldx/test2/deprecated_shell/runfoldx.sh similarity index 100% rename from foldx/test2/runfoldx.sh rename to foldx/test2/deprecated_shell/runfoldx.sh diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py index 2ba784d..0e0fab6 100755 --- a/foldx/test2/runFoldx.py +++ b/foldx/test2/runFoldx.py @@ -9,6 +9,7 @@ from pathlib import Path import re import csv import argparse +import shutil #https://realpython.com/python-pathlib/ # FIXME @@ -226,55 +227,123 @@ def main(): #subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) # once you decide to use the function # repairPDB(pdbname) + + # FIXME: put this hack elsewhere + foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 ' + subprocess.call(['foldx' , '--command=RepairPDB' + , foldx_common , '--pdb-dir=' + indir , '--pdb=' + actual_pdb_filename - , '--ionStrength=0.05'# - , '--pH=7' - , '--water=PREDICT' - , '--vdwDesign=1' , 'outPDB=true' , '--output-dir=' + process_dir]) print('\033[95mCOMPLETE: repair PDB\033[0m') - print('\033[95mSTAGE: run FoldX (shell)\033[0m') + print('\033[95mSTAGE: run FoldX (subprocess)\033[0m') print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir)) - output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) - print('\033[95mCOMPLETE: run FoldX (shell)\033[0m') + #output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) + + print('Running foldx BuildModel') + subprocess.call(['foldx' + , '--command=BuildModel' + , foldx_common + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--mutant-file="individual_list_' + pdbname +'.txt"' + , 'outPDB=true' + , '--numberOfRuns=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx PrintNetworks') + subprocess.call(['foldx' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx SequenceDetail') + subprocess.call(['foldx' + , '--command=SequenceDetail' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + + print('\033[95mCOMPLETE: run FoldX (subprocess)\033[0m') print('\033[95mSTAGE: Print Networks (shell)\033[0m') for n in range(1,nmuts+1): print('\033[95mNETWORK:\033[0m', n) - print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir )) - with suppress(Exception): - subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) + #print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir )) + #with suppress(Exception): + #foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} + print('Running foldx PrintNetworks for mutation', n) + subprocess.call(['foldx' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + #subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) print('\033[95mCOMPLETE: Print Networks (shell)\033[0m') print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m') for n in range(1,nmuts+1): print('\033[95mMUTATION:\033[0m', n) print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir )) + # FIXME: this is bad design and needs to be done in a pythonic way with suppress(Exception): subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) print('\033[95mCOMPLETE: Rename Mutation Files (shell)\033[0m') print('\033[95mSTAGE: Rename Files (shell)\033[0m') + # FIXME: this is bad design and needs to be done in a pythonic way out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) print('\033[95mCOMPLETE: Rename Files (shell)\033[0m') if comp=='y': - print('\033[95mSTAGE: Run Complex (shell)\033[0m') + print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess)\033[0m') chain1=chainA chain2=chainB - with suppress(Exception): + #with suppress(Exception): #subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) - subprocess(['foldx --command=AnalyseComplex --pdb="%s_Repair.pdb" --analyseComplexChains=%s,%s --water=PREDICT --vdwDesign=1 --output-dir=%s'] % (pdbname, chain1, chain2, process_dir)) + subprocess.call(['foldx' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout' + ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + shutil.copyfile(ac_source, ac_dest) for n in range(1,nmuts+1): - print('\033[95mSTAGE: Run Mutation Complex (shell) for mutation:\033[0m', n) - with suppress(Exception): - subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) - print('\033[95mCOMPLETE: Run Complex (shell)\033[0m') + print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess) for mutation:\033[0m', n) + #with suppress(Exception): + # subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) + subprocess.call(['foldx' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout' + ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt' + shutil.copyfile(ac_mut_source, ac_mut_dest) + print('\033[95mCOMPLETE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n) interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', From ca6899626461f720675ffe3318e95dbe63c4aedc Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 10:54:35 +0000 Subject: [PATCH 06/12] renamed file runFoldx.py in test2/ to reflect this --- foldx/test2/{runFoldx.py => runFoldx_test2.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename foldx/test2/{runFoldx.py => runFoldx_test2.py} (100%) diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx_test2.py similarity index 100% rename from foldx/test2/runFoldx.py rename to foldx/test2/runFoldx_test2.py From 56150ae3c8dbd26672f45a5a4132828cf292f880 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 14:42:44 +0000 Subject: [PATCH 07/12] various changes --- foldx/mutrenamefiles.sh | 8 +- foldx/mutrenamefiles_mac.sh | 68 ------------ foldx/mutruncomplex.sh | 10 -- foldx/renamefiles.sh | 8 +- foldx/renamefiles_mac.sh | 68 ------------ foldx/repairPDB.sh | 9 -- foldx/runFoldx.py | 189 ++++++++++++++++++++++++++++------ foldx/runPrintNetworks.sh | 7 -- foldx/runcomplex.sh | 9 -- foldx/runfoldx.sh | 9 -- foldx/test2/mutrenamefiles.sh | 2 +- foldx/test2/renamefiles.sh | 3 +- 12 files changed, 159 insertions(+), 231 deletions(-) delete mode 100755 foldx/mutrenamefiles_mac.sh delete mode 100755 foldx/mutruncomplex.sh delete mode 100755 foldx/renamefiles_mac.sh delete mode 100755 foldx/repairPDB.sh delete mode 100755 foldx/runPrintNetworks.sh delete mode 100755 foldx/runcomplex.sh delete mode 100755 foldx/runfoldx.sh diff --git a/foldx/mutrenamefiles.sh b/foldx/mutrenamefiles.sh index 88a5d03..d9f938c 100755 --- a/foldx/mutrenamefiles.sh +++ b/foldx/mutrenamefiles.sh @@ -2,7 +2,7 @@ PDB=$1 n=$2 OUTDIR=$3 cd ${OUTDIR} -logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}" + cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt @@ -61,9 +61,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidue sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt - - - - - - diff --git a/foldx/mutrenamefiles_mac.sh b/foldx/mutrenamefiles_mac.sh deleted file mode 100755 index b0e2fe9..0000000 --- a/foldx/mutrenamefiles_mac.sh +++ /dev/null @@ -1,68 +0,0 @@ -PDB=$1 -n=$2 -#cd /home/tanu/git/LSHTM_analysis/foldx/ -logger "Running mutrenamefiles_mac" -cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt -cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt -cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt -cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt -cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt -cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt -cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt -sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt -sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt -sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt -sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt -cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt -cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt -sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt - - - - - - diff --git a/foldx/mutruncomplex.sh b/foldx/mutruncomplex.sh deleted file mode 100755 index 2b2c4e1..0000000 --- a/foldx/mutruncomplex.sh +++ /dev/null @@ -1,10 +0,0 @@ -PDB=$1 -A=$2 -B=$3 -n=$4 -OUTDIR=$5 -cd ${OUTDIR} -logger "Running mutruncomplex" -foldx --command=AnalyseComplex --pdb="${PDB}_Repair_${n}.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 -cp ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt -#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt diff --git a/foldx/renamefiles.sh b/foldx/renamefiles.sh index 553ba5f..5cb0bf5 100755 --- a/foldx/renamefiles.sh +++ b/foldx/renamefiles.sh @@ -1,7 +1,7 @@ PDB=$1 OUTDIR=$2 cd ${OUTDIR} -logger "Running renamefiles" + cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt sed -i '1,8d' Dif_${PDB}_Repair.txt cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt @@ -62,9 +62,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Vol sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt - - - - - - diff --git a/foldx/renamefiles_mac.sh b/foldx/renamefiles_mac.sh deleted file mode 100755 index ea517bc..0000000 --- a/foldx/renamefiles_mac.sh +++ /dev/null @@ -1,68 +0,0 @@ -PDB=$1 -logger "Running renamefiles_mac" -#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt -sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt -cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt -cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt -cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt -cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt -cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt -cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt -cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt -sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt -sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt -sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt -sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt -cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt -cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt -cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt -cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt -cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt -cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt -cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt -cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt -cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt -cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt -cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt -cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt -cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt -sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt - - - - - - diff --git a/foldx/repairPDB.sh b/foldx/repairPDB.sh deleted file mode 100755 index ee1a13c..0000000 --- a/foldx/repairPDB.sh +++ /dev/null @@ -1,9 +0,0 @@ -INDIR=$1 -PDB=$2 -OUTDIR=$3 - -logger "Running repairPDB" - -#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} - -foldx --command=RepairPDB --pdb-dir=${INDIR} --pdb=${PDB} --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index 2bf0067..7b89ae9 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import subprocess import os +import sys import numpy as np import pandas as pd from contextlib import suppress @@ -8,6 +9,8 @@ from pathlib import Path import re import csv import argparse +import shutil +import time #https://realpython.com/python-pathlib/ # FIXME @@ -22,8 +25,8 @@ homedir = os.path.expanduser('~') # set working dir os.getcwd() -os.chdir(homedir + '/git/LSHTM_analysis/foldx/') -os.getcwd() +#os.chdir(homedir + '/git/LSHTM_analysis/foldx/') +#os.getcwd() #======================================================================= #%% command line args @@ -37,7 +40,7 @@ arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb fi arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME -arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') +arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') # DO NOT specify an absolute path arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') # FIXME: Doesn't work with 2 chains yet! @@ -77,34 +80,45 @@ if not datadir: datadir = homedir + '/' + 'git/Data' if not indir: - indir = datadir + '/' + drug + '/input' + indir = + datadir + '/' + drug + '/input' if not outdir: outdir = datadir + '/' + drug + '/output' #TODO: perhaps better handled by refactoring code to prevent generating lots of output files! if not process_dir: - process_dir = datadir + '/' + drug +'/' + 'processing' + process_dir = datadir + '/' + drug + '/processing' + +# Make all paths absolute in case the user forgot +indir = os.path.abspath(indir) +process_dir = os.path.abspath(process_dir) +outdir = os.path.abspath(outdir) +datadir = os.path.abspath(datadir) #======= # input #======= # FIXME if pdb_filename: + pdb_filename = os.path.abspath(pdb_filename) pdb_name = Path(pdb_filename).stem + infile_pdb = pdb_filename else: pdb_filename = gene.lower() + '_complex.pdb' pdb_name = Path(pdb_filename).stem - -infile_pdb = indir + '/' + pdb_filename + infile_pdb = indir + '/' + pdb_filename + actual_pdb_filename = Path(infile_pdb).name if mut_filename: - mutation_file = mut_filename + mutation_file = os.path.abspath(mut_filename) + infile_muts = mutation_file + print('User-provided mutation file in use:', infile_muts) else: mutation_file = gene.lower() + '_mcsm_formatted_snps.csv' - -infile_muts = outdir + '/' + mutation_file + infile_muts = outdir + '/' + mutation_file + print('WARNING: Assuming default mutation file:', infile_muts) #======= # output @@ -116,6 +130,7 @@ print('Arguments being passed:' , '\nDrug:', args.drug , '\ngene:', args.gene , '\ninput dir:', indir +, '\nprocess dir:', process_dir , '\noutput dir:', outdir , '\npdb file:', infile_pdb , '\npdb name:', pdb_name @@ -124,6 +139,9 @@ print('Arguments being passed:' , '\nchain1:', args.chain1 , '\noutput file:', outfile_foldx , '\n=============================================================') + +#### Delay for 10 seconds to check the params #### +time.sleep(10) #======================================================================= def getInteractionEnergy(filename): @@ -184,6 +202,19 @@ def loadFiles(df): f.close() return np.asarray(resultList, dtype=np.float32) +# TODO: put the subprocess call in a 'def' +#def repairPDB(): +# subprocess.call(['foldx' +# , '--command=RepairPDB' +# , '--pdb-dir=' + indir +# , '--pdb=' + actual_pdb_filename +# , '--ionStrength=0.05'# +# , '--pH=7' +# , '--water=PREDICT' +# , '--vdwDesign=1' +# , 'outPDB=true' +# , '--output-dir=' + process_dir]) + #======================================================================= def main(): pdbname = pdb_name @@ -195,37 +226,128 @@ def main(): nmuts = len(mutlist) print(nmuts) print(mutlist) - print('start') - #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) - subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) - print('end') - output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) + print('start') + # some common parameters for foldX + foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 ' + print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m') + print('Running foldx RepairPDB for WT') + subprocess.call(['foldx' + , '--command=RepairPDB' + , foldx_common + , '--pdb-dir=' + indir + , '--pdb=' + actual_pdb_filename + , 'outPDB=true' + , '--output-dir=' + process_dir]) + print('\033[95mCOMPLETED STAGE: repair PDB\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m') + print('Running foldx BuildModel for WT') + subprocess.call(['foldx' + , '--command=BuildModel' + , foldx_common + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--mutant-file="individual_list_' + pdbname +'.txt"' + , 'outPDB=true' + , '--numberOfRuns=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx PrintNetworks for WT') + subprocess.call(['foldx' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx SequenceDetail for WT') + subprocess.call(['foldx' + , '--command=SequenceDetail' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m') for n in range(1,nmuts+1): - print(n) - with suppress(Exception): - subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) - + print('\033[95mNETWORK:\033[0m', n) + print('Running foldx PrintNetworks for mutation', n) + subprocess.call(['foldx' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m') for n in range(1,nmuts+1): - print(n) + print('\033[95mMUTATION:\033[0m', n) + print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir )) + #FIXME: bad design and needs to be done in a pythonic way with suppress(Exception): subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) - + print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m') + # FIXME: this is bad design and needs to be done in a pythonic way out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) - + print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m') + print('\n==========================================================') + + if comp=='y': + print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m') chain1=chainA chain2=chainB - with suppress(Exception): - subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) - for n in range(1,nmuts+1): - with suppress(Exception): - subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) + subprocess.call(['foldx' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) - interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', - 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', - 'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout' + ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + shutil.copyfile(ac_source, ac_dest) + print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n) + + for n in range(1,nmuts+1): + print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n) + subprocess.call(['foldx' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout' + ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt' + shutil.copyfile(ac_mut_source, ac_mut_dest) + print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n) + print('\n==========================================================') + + interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt' dGdata = pd.read_csv(dGdatafile, sep = '\t') @@ -267,8 +389,7 @@ def main(): print(d) data[i+1] = d - interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', -'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] print(interactions) @@ -292,8 +413,7 @@ def main(): print(len(IE)) data = np.append(data,[IE], axis = 0) print(data) - interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', -'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy'] + interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy'] mut_file = process_dir + '/individual_list_' + pdbname + '.txt' with open(mut_file) as csvfile: @@ -331,6 +451,7 @@ def main(): #outputfilename = 'foldx_results_' + pdbname + '.csv' #results.to_csv(outputfilename) results2.to_csv(outputfilename, index = False) + print ('end') if __name__ == '__main__': main() diff --git a/foldx/runPrintNetworks.sh b/foldx/runPrintNetworks.sh deleted file mode 100755 index c2bebab..0000000 --- a/foldx/runPrintNetworks.sh +++ /dev/null @@ -1,7 +0,0 @@ -PDB=$1 -n=$2 -OUTDIR=$3 -logger "Running runPrintNetworks" -cd ${OUTDIR} - -foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} diff --git a/foldx/runcomplex.sh b/foldx/runcomplex.sh deleted file mode 100755 index 0c0483d..0000000 --- a/foldx/runcomplex.sh +++ /dev/null @@ -1,9 +0,0 @@ -PDB=$1 -A=$2 -B=$3 -OUTDIR=$4 -cd ${OUTDIR} -logger "Running runcomplex" -foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} -cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt -#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt diff --git a/foldx/runfoldx.sh b/foldx/runfoldx.sh deleted file mode 100755 index 5a929ce..0000000 --- a/foldx/runfoldx.sh +++ /dev/null @@ -1,9 +0,0 @@ -PDB=$1 -OUTDIR=$2 -cd ${OUTDIR} -pwd -ls -logger "Running runfoldx" -foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR} -foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} -foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} diff --git a/foldx/test2/mutrenamefiles.sh b/foldx/test2/mutrenamefiles.sh index d9f938c..3b045e5 100755 --- a/foldx/test2/mutrenamefiles.sh +++ b/foldx/test2/mutrenamefiles.sh @@ -2,7 +2,7 @@ PDB=$1 n=$2 OUTDIR=$3 cd ${OUTDIR} - +#cd /home/git/LSHTM_analysis/foldx/test2 cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt diff --git a/foldx/test2/renamefiles.sh b/foldx/test2/renamefiles.sh index 2a2b193..f69ab68 100755 --- a/foldx/test2/renamefiles.sh +++ b/foldx/test2/renamefiles.sh @@ -1,8 +1,7 @@ PDB=$1 OUTDIR=$2 cd ${OUTDIR} - -#cd /home/git/LSHTM_analysis/foldx/test +#cd /home/git/LSHTM_analysis/foldx/test2 cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt sed -i '1,8d' Dif_${PDB}_Repair.txt cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt From 725e9b53ca103a1d23d6e71c284be203c59e9889 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 14:43:03 +0000 Subject: [PATCH 08/12] test2 runfoldx symlink --- foldx/test2/runFoldx.py | 1 + 1 file changed, 1 insertion(+) create mode 120000 foldx/test2/runFoldx.py diff --git a/foldx/test2/runFoldx.py b/foldx/test2/runFoldx.py new file mode 120000 index 0000000..eccad30 --- /dev/null +++ b/foldx/test2/runFoldx.py @@ -0,0 +1 @@ +../runFoldx.py \ No newline at end of file From 8302d0186776ba34e83655e8429ba3e56736983e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 15:00:03 +0000 Subject: [PATCH 09/12] check to handle missing I/O/P dirs if drug unset --- foldx/runFoldx.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index 7b89ae9..b5a4e69 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -73,6 +73,14 @@ pdb_filename = args.pdb_file # Just the filename, thanks #pdb_name = Path(in_filename_pdb).stem + +# Handle the case where neither 'drug' +# nor (indir,outdir,process_dir) are defined +if not drug: + if not indir or not outdir or not process_dir: + print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories') + sys.exit() + #============== # directories #============== @@ -80,8 +88,7 @@ if not datadir: datadir = homedir + '/' + 'git/Data' if not indir: - indir = - datadir + '/' + drug + '/input' + indir = datadir + '/' + drug + '/input' if not outdir: outdir = datadir + '/' + drug + '/output' @@ -141,6 +148,7 @@ print('Arguments being passed:' , '\n=============================================================') #### Delay for 10 seconds to check the params #### +print('Sleeping for 10 seconds to give you time to cancel') time.sleep(10) #======================================================================= From 4163ede7989a22f38ab8f6261b9d4fb7b733a3be Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 15:20:55 +0000 Subject: [PATCH 10/12] dont break when the pdb file is in a weird place with a weird name --- foldx/runFoldx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index b5a4e69..d54d507 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -40,7 +40,7 @@ arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb fi arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME -arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') # DO NOT specify an absolute path +arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') # FIXME: Doesn't work with 2 chains yet! @@ -244,7 +244,7 @@ def main(): subprocess.call(['foldx' , '--command=RepairPDB' , foldx_common - , '--pdb-dir=' + indir + , '--pdb-dir=' + os.path.dirname(pdb_filename) , '--pdb=' + actual_pdb_filename , 'outPDB=true' , '--output-dir=' + process_dir]) From 534a6754cd6bbf309aaf03147c4605eb4914c273 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 15:45:21 +0000 Subject: [PATCH 11/12] add foldx5 wrapper --- foldx/runFoldx.py | 23 +-- foldx/runFoldx5.py | 466 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 478 insertions(+), 11 deletions(-) create mode 100755 foldx/runFoldx5.py diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py index d54d507..8d9358b 100755 --- a/foldx/runFoldx.py +++ b/foldx/runFoldx.py @@ -40,7 +40,7 @@ arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb fi arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME -arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') +arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') # FIXME: Doesn't work with 2 chains yet! @@ -55,18 +55,19 @@ args = arg_parser.parse_args() #gene_match = gene + '_p.' #%%===================================================================== # Command line options -drug = args.drug -gene = args.gene +drug = args.drug +gene = args.gene -datadir = args.datadir -indir = args.input_dir -outdir = args.output_dir -process_dir = args.process_dir +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir +process_dir = args.process_dir + +mut_filename = args.mutation_file +chainA = args.chain1 +chainB = args.chain2 +pdb_filename = args.pdb_file -mut_filename = args.mutation_file -chainA = args.chain1 -chainB = args.chain2 -pdb_filename = args.pdb_file # os.path.splitext will fail interestingly with file.pdb.txt.zip #pdb_name = os.path.splitext(pdb_file)[0] diff --git a/foldx/runFoldx5.py b/foldx/runFoldx5.py new file mode 100755 index 0000000..8a07a4f --- /dev/null +++ b/foldx/runFoldx5.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +import subprocess +import os +import sys +import numpy as np +import pandas as pd +from contextlib import suppress +from pathlib import Path +import re +import csv +import argparse +import shutil +import time +#https://realpython.com/python-pathlib/ + +# FIXME +#strong dependency of file and path names +#cannot pass file with path. Need to pass them separately +#assumptions made for dir struc as standard +#datadir + drug + input + +#======================================================================= +#%% specify input and curr dir +homedir = os.path.expanduser('~') + +# set working dir +os.getcwd() +#os.chdir(homedir + '/git/LSHTM_analysis/foldx/') +#os.getcwd() + +#======================================================================= +#%% command line args +arg_parser = argparse.ArgumentParser() + +arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None) +arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None) + +arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') +arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME + +arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called _complex.pdb in input_dir') +arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called _mcsm_snps.csv exists') + +# FIXME: Doesn't work with 2 chains yet! +arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive +arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive + +args = arg_parser.parse_args() +#======================================================================= +#%% variable assignment: input and output +#drug = 'pyrazinamide' +#gene = 'pncA' +#gene_match = gene + '_p.' +#%%===================================================================== +# Command line options +drug = args.drug +gene = args.gene + +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir +process_dir = args.process_dir + +mut_filename = args.mutation_file +chainA = args.chain1 +chainB = args.chain2 +pdb_filename = args.pdb_file + + +# os.path.splitext will fail interestingly with file.pdb.txt.zip +#pdb_name = os.path.splitext(pdb_file)[0] +# Just the filename, thanks +#pdb_name = Path(in_filename_pdb).stem + + +# Handle the case where neither 'drug' +# nor (indir,outdir,process_dir) are defined +if not drug: + if not indir or not outdir or not process_dir: + print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories') + sys.exit() + +#============== +# directories +#============== +if not datadir: + datadir = homedir + '/' + 'git/Data' + +if not indir: + indir = datadir + '/' + drug + '/input' + +if not outdir: + outdir = datadir + '/' + drug + '/output' + +#TODO: perhaps better handled by refactoring code to prevent generating lots of output files! +if not process_dir: + process_dir = datadir + '/' + drug + '/processing' + +# Make all paths absolute in case the user forgot +indir = os.path.abspath(indir) +process_dir = os.path.abspath(process_dir) +outdir = os.path.abspath(outdir) +datadir = os.path.abspath(datadir) + +#======= +# input +#======= +# FIXME +if pdb_filename: + pdb_filename = os.path.abspath(pdb_filename) + pdb_name = Path(pdb_filename).stem + infile_pdb = pdb_filename +else: + pdb_filename = gene.lower() + '_complex.pdb' + pdb_name = Path(pdb_filename).stem + infile_pdb = indir + '/' + pdb_filename + +actual_pdb_filename = Path(infile_pdb).name + +if mut_filename: + mutation_file = os.path.abspath(mut_filename) + infile_muts = mutation_file + print('User-provided mutation file in use:', infile_muts) +else: + mutation_file = gene.lower() + '_mcsm_formatted_snps.csv' + infile_muts = outdir + '/' + mutation_file + print('WARNING: Assuming default mutation file:', infile_muts) + +#======= +# output +#======= +out_filename = gene.lower() + '_foldx.csv' +outfile_foldx = outdir + '/' + out_filename + +print('Arguments being passed:' +, '\nDrug:', args.drug +, '\ngene:', args.gene +, '\ninput dir:', indir +, '\nprocess dir:', process_dir +, '\noutput dir:', outdir +, '\npdb file:', infile_pdb +, '\npdb name:', pdb_name +, '\nactual pdb name:', actual_pdb_filename +, '\nmutation file:', infile_muts +, '\nchain1:', args.chain1 +, '\noutput file:', outfile_foldx +, '\n=============================================================') + +#### Delay for 10 seconds to check the params #### +print('Sleeping for 10 seconds to give you time to cancel') +time.sleep(10) +#======================================================================= + +def getInteractionEnergy(filename): + data = pd.read_csv(filename,sep = '\t') + return data['Interaction Energy'].loc[0] + +def getInteractions(filename): + data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t') + contactList = getIndexes(data,1) + number = len(contactList) + return number + +def formatMuts(mut_file,pdbname): + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + muts = [] + for row in readCSV: + mut = row[0] + muts.append(mut) + + mut_list = [] + outfile = process_dir + '/individual_list_' + pdbname + '.txt' + with open(outfile, 'w') as output: + for m in muts: + print(m) + mut = m[:1] + chainA+ m[1:] + mut_list.append(mut) + mut = mut + ';' + print(mut) + output.write(mut) + output.write('\n') + return mut_list + +def getIndexes(data, value): + colnames = data.columns.values + listOfPos = list() + result = data.isin([value]) + result.columns = colnames + seriesdata = result.any() + columnNames = list(seriesdata[seriesdata==True].index) + for col in columnNames: + rows = list(result[col][result[col]==True].index) + + for row in rows: + listOfPos.append((row,col)) + + return listOfPos + +def loadFiles(df): + # load a text file in to np matrix + resultList = [] + f = open(df,'r') + for line in f: + line = line.rstrip('\n') + aVals = line.split('\t') + fVals = list(map(np.float32, sVals)) + resultList.append(fVals) + f.close() + return np.asarray(resultList, dtype=np.float32) + +# TODO: put the subprocess call in a 'def' +#def repairPDB(): +# subprocess.call(['foldx' +# , '--command=RepairPDB' +# , '--pdb-dir=' + indir +# , '--pdb=' + actual_pdb_filename +# , '--ionStrength=0.05'# +# , '--pH=7' +# , '--water=PREDICT' +# , '--vdwDesign=1' +# , 'outPDB=true' +# , '--output-dir=' + process_dir]) + +#======================================================================= +def main(): + pdbname = pdb_name + comp = '' # for complex only + mut_filename = infile_muts #pnca_mcsm_snps.csv + mutlist = formatMuts(mut_filename, pdbname) + + print(mutlist) + nmuts = len(mutlist) + print(nmuts) + print(mutlist) + + print('start') + # some common parameters for foldX + foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 ' + + print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m') + print('Running foldx RepairPDB for WT') + subprocess.call(['foldx5' + , '--command=RepairPDB' + , foldx_common + , '--pdb-dir=' + os.path.dirname(pdb_filename) + , '--pdb=' + actual_pdb_filename + , 'outPDB=true' + , '--output-dir=' + process_dir]) + print('\033[95mCOMPLETED STAGE: repair PDB\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m') + print('Running foldx BuildModel for WT') + subprocess.call(['foldx5' + , '--command=BuildModel' + , foldx_common + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--mutant-file="individual_list_' + pdbname +'.txt"' + , 'outPDB=true' + , '--numberOfRuns=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx PrintNetworks for WT') + subprocess.call(['foldx5' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + print('Running foldx SequenceDetail for WT') + subprocess.call(['foldx5' + , '--command=SequenceDetail' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m') + for n in range(1,nmuts+1): + print('\033[95mNETWORK:\033[0m', n) + print('Running foldx PrintNetworks for mutation', n) + subprocess.call(['foldx5' + , '--command=PrintNetworks' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m') + for n in range(1,nmuts+1): + print('\033[95mMUTATION:\033[0m', n) + print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir )) + #FIXME: bad design and needs to be done in a pythonic way + with suppress(Exception): + subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) + print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m') + print('\n==========================================================') + + + print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m') + # FIXME: this is bad design and needs to be done in a pythonic way + out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) + print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m') + print('\n==========================================================') + + + if comp=='y': + print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m') + chain1=chainA + chain2=chainB + subprocess.call(['foldx5' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout' + ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + shutil.copyfile(ac_source, ac_dest) + print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n) + + for n in range(1,nmuts+1): + print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n) + subprocess.call(['foldx5' + , '--command=AnalyseComplex' + , '--pdb-dir=' + process_dir + , '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb' + , '--analyseComplexChains=' + chain1 + ',' + chain2 + , '--water=PREDICT' + , '--vdwDesign=1' + , '--output-dir=' + process_dir], cwd=process_dir) + + # FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh + ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout' + ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt' + shutil.copyfile(ac_mut_source, ac_mut_dest) + print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n) + print('\n==========================================================') + + interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt' + dGdata = pd.read_csv(dGdatafile, sep = '\t') + + ddG=[] + print('ddG') + print(len(dGdata)) + for i in range(0,len(dGdata)): + ddG.append(dGdata['total energy'].loc[i]) + + + nint = len(interactions) + wt_int = [] + + for i in interactions: + filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt' + wt_int.append(getInteractions(filename)) + print('wt') + print(wt_int) + + ntotal = nint+1 + print(ntotal) + print(nmuts) + data = np.empty((ntotal,nmuts)) + data[0] = ddG + print(data) + for i in range(0,len(interactions)): + d=[] + p=0 + for n in range(1, nmuts+1): + print(i) + filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt' + mut = getInteractions(filename) + diff = wt_int[i] - mut + print(diff) + print(wt_int[i]) + print(mut) + d.append(diff) + print(d) + data[i+1] = d + + interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + print(interactions) + + IE = [] + if comp=='y': + wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + wtE = getInteractionEnergy(wtfilename) + print(wtE) + for n in range(1,nmuts+1): + print(n) + filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt' + mutE = getInteractionEnergy(filename) + print(mutE) + diff = wtE - mutE + print(diff) + IE.append(diff) + print(IE) + IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist) + IEfilename = 'foldx_complexresults_'+pdbname+'.csv' + IEresults.to_csv(IEfilename) + print(len(IE)) + data = np.append(data,[IE], axis = 0) + print(data) + interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy'] + + mut_file = process_dir + '/individual_list_' + pdbname + '.txt' + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + mutlist = [] + for row in readCSV: + mut = row[0] + mutlist.append(mut) + print(mutlist) + print(len(mutlist)) + print(data) + results = pd.DataFrame(data, columns = mutlist, index = interactions) + results.append(ddG) + #print(results.head()) + + # my style formatted results + results2 = results.T # transpose df + results2.index.name = 'mutationinformation' # assign name to index + results2 = results2.reset_index() # turn it into a columns + + results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id) + results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space + + results2.rename(columns = {'Distances': 'Contacts'}, inplace = True) + + # lower case columns + results2.columns = results2.columns.str.lower() + + print('Writing file in the format below:\n' + , results2.head() + , '\nNo. of rows:', len(results2) + , '\nNo. of cols:', len(results2.columns)) + + outputfilename = outfile_foldx + #outputfilename = 'foldx_results_' + pdbname + '.csv' + #results.to_csv(outputfilename) + results2.to_csv(outputfilename, index = False) + print ('end') + +if __name__ == '__main__': + main() From 64018cce4cced45a789e03dc2eaa2349936bddd9 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 9 Feb 2021 16:11:07 +0000 Subject: [PATCH 12/12] added dynamut dir --- dynamut/dynamut.py | 46 ++++++++++++++++++ dynamut/dynamut_test.py | 101 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100755 dynamut/dynamut.py create mode 100755 dynamut/dynamut_test.py diff --git a/dynamut/dynamut.py b/dynamut/dynamut.py new file mode 100755 index 0000000..fca749b --- /dev/null +++ b/dynamut/dynamut.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" + + +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%============================================================================ + +#1) define muts batch +#take mcsm file +#split into 'n' batches +#write output file with suffix of batch number + + +#********** done this par **************** +#2) get results for a batch url +# read file +# store batch url +#extract number +#build single url +#build single results urls +#get results and store them in df +#update df +#dim of df = no. of muts in batch + +#3) format results +# store unit measurements separtely +# omit unit measurements from cols +# create extra columns '_outcome' suffix by splitting numerical output +# create separate col for mcsm as it doesn't have output text + +#%%============================================================================ diff --git a/dynamut/dynamut_test.py b/dynamut/dynamut_test.py new file mode 100755 index 0000000..3e8ed34 --- /dev/null +++ b/dynamut/dynamut_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 14:33:51 2020 + +@author: tanu +""" + + +#%% load packages +import os,sys +import subprocess +import argparse +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype +#%%============================================================================ + +batch_result_url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/15955901077' + +mut = 'S104R' +single_result_url = 'http://biosig.unimelb.edu.au/dynamut/single_results/15955901077' + '/' + mut + + + +#%%============================================================================ +param_dict = {} + +result_response = requests.get(single_result_url) +if result_response.status_code == 200: + print('Fetching results') + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : mut + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + +#%% for loop +#%% +host_dynamut = 'http://biosig.unimelb.edu.au/dynamut' +batch_url_number = re.search(r'([0-9]+)$', batch_result_url).group(0) +single_url = host_dynamut + '/single_results/' + batch_url_number + +muts = ["S104R", "G24R"] + +# initilialise empty df +dynamut_results_df = pd.DataFrame() + +for i, mut in enumerate(muts): + #param_dict = {} + print('Running mutation', i, ':', mut) + snp = mut + single_result_url = single_url + '/' + snp + print('Getting results from:', single_result_url) + + result_response = requests.get(single_result_url) + if result_response.status_code == 200: + print('Fetching results') + # extract results using the html parser + soup = BeautifulSoup(result_response.text, features = 'html.parser') + #web_result_raw = soup.find(id = 'predictions').get_text() + ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text() + ddg_encom = soup.find(id = 'ddg_encom').get_text() + ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text() + ddg_sdm = soup.find(id = 'ddg_sdm').get_text() + ddg_duet = soup.find(id = 'ddg_duet').get_text() + dds_encom = soup.find(id = 'dds_encom').get_text() + + param_dict = {"mutationinformation" : snp + , "ddg_dynamut" : ddg_dynamut + , "ddg_encom" : ddg_encom + , "ddg_mcsm" : ddg_mcsm + , "ddg_sdm" : ddg_sdm + , "ddg_duet" : ddg_duet + , "dds_encom" : dds_encom + } + results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T + print(results_df) + dynamut_results_df = dynamut_results_df.append(results_df) + print(dynamut_results_df) + +