refactor foldx pipeline to include:
* command-line args * creating necessary dirs automagically * code cleanup, syntax errors, etc etc
This commit is contained in:
parent
e8a66a7a94
commit
973a1a33da
12 changed files with 700 additions and 56 deletions
69
foldx/mutrenamefiles.sh
Executable file
69
foldx/mutrenamefiles.sh
Executable file
|
@ -0,0 +1,69 @@
|
|||
PDB=$1
|
||||
n=$2
|
||||
OUTDIR=$3
|
||||
cd ${OUTDIR}
|
||||
logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}"
|
||||
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
68
foldx/mutrenamefiles_mac.sh
Executable file
68
foldx/mutrenamefiles_mac.sh
Executable file
|
@ -0,0 +1,68 @@
|
|||
PDB=$1
|
||||
n=$2
|
||||
#cd /home/tanu/git/LSHTM_analysis/foldx/
|
||||
logger "Running mutrenamefiles_mac"
|
||||
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
10
foldx/mutruncomplex.sh
Executable file
10
foldx/mutruncomplex.sh
Executable file
|
@ -0,0 +1,10 @@
|
|||
PDB=$1
|
||||
A=$2
|
||||
B=$3
|
||||
n=$4
|
||||
OUTDIR=$5
|
||||
cd ${OUTDIR}
|
||||
logger "Running mutruncomplex"
|
||||
foldx --command=AnalyseComplex --pdb="${PDB}_Repair_${n}.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1
|
||||
cp ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt
|
||||
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt
|
70
foldx/renamefiles.sh
Executable file
70
foldx/renamefiles.sh
Executable file
|
@ -0,0 +1,70 @@
|
|||
PDB=$1
|
||||
OUTDIR=$2
|
||||
cd ${OUTDIR}
|
||||
logger "Running renamefiles"
|
||||
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||
sed -i '1,8d' Dif_${PDB}_Repair.txt
|
||||
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
68
foldx/renamefiles_mac.sh
Executable file
68
foldx/renamefiles_mac.sh
Executable file
|
@ -0,0 +1,68 @@
|
|||
PDB=$1
|
||||
logger "Running renamefiles_mac"
|
||||
#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||
sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt
|
||||
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
9
foldx/repairPDB.sh
Executable file
9
foldx/repairPDB.sh
Executable file
|
@ -0,0 +1,9 @@
|
|||
INDIR=$1
|
||||
PDB=$2
|
||||
OUTDIR=$3
|
||||
|
||||
logger "Running repairPDB"
|
||||
|
||||
#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
|
||||
|
||||
foldx --command=RepairPDB --pdb-dir=${INDIR} --pdb=${PDB} --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
|
332
foldx/runFoldx.py
Executable file
332
foldx/runFoldx.py
Executable file
|
@ -0,0 +1,332 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
import re
|
||||
import csv
|
||||
import argparse
|
||||
#https://realpython.com/python-pathlib/
|
||||
|
||||
# FIXME
|
||||
#strong dependency of file and path names
|
||||
#cannot pass file with path. Need to pass them separately
|
||||
#assumptions made for dir struc as standard
|
||||
#datadir + drug + input
|
||||
|
||||
#=======================================================================
|
||||
#%% specify input and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||
os.getcwd()
|
||||
|
||||
#=======================================================================
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
|
||||
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = 'pyrazinamide')
|
||||
arg_parser.add_argument('-g', '--gene', help = 'gene name', default = 'pncA') # case sensitive
|
||||
|
||||
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + drug + input', default = None)
|
||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + drug + output', default = None)
|
||||
|
||||
|
||||
arg_parser.add_argument('-f', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called gene_complex.pdb', default = None)
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called gene_test_snps.csv exists', default = None)
|
||||
|
||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
#%%=====================================================================
|
||||
# Command Line Options
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
indir = args.input_dir
|
||||
outdir = args.output_dir
|
||||
mut_filename = args.mutation_file
|
||||
chainA = args.chain1
|
||||
chainB = args.chain2
|
||||
pdb_filename = args.pdb_file
|
||||
|
||||
# os.path.splitext will fail interestingly with file.pdb.txt.zip
|
||||
#pdb_name = os.path.splitext(pdb_file)[0]
|
||||
# Just the filename, thanks
|
||||
#pdb_name = Path(in_filename_pdb).stem
|
||||
|
||||
#==========
|
||||
# dir
|
||||
#==========
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
if not indir:
|
||||
indir = datadir + '/' + drug + '/' + 'input'
|
||||
|
||||
if not outdir:
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
# FIXME:
|
||||
process_dir = datadir + '/' + drug +'/' + 'processing'
|
||||
# FIXME: this is a temporary directory and should be correctly handled
|
||||
os.mkdir(process_dir)
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
# FIXME
|
||||
|
||||
|
||||
if pdb_filename:
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
else:
|
||||
pdb_filename = gene.lower() + '_complex.pdb'
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
|
||||
infile_pdb = indir + '/' + pdb_filename
|
||||
actual_pdb_filename = Path(infile_pdb).name
|
||||
|
||||
if mut_filename:
|
||||
mutation_file = mut_filename
|
||||
else:
|
||||
#mutation_file = gene.lower() + '_mcsm_snps.csv' #real
|
||||
mutation_file = gene.lower() + '_test_snps.csv' #test
|
||||
|
||||
infile_muts = outdir + '/' + mutation_file
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
out_filename = gene.lower() + '_foldx_results.csv'
|
||||
outfile_foldx = outdir + '/' + out_filename
|
||||
|
||||
print('Arguments being passed:'
|
||||
, '\nDrug:', args.drug
|
||||
, '\ngene:', args.gene
|
||||
, '\ninput dir:', indir
|
||||
, '\noutput dir:', outdir
|
||||
, '\npdb file:', infile_pdb
|
||||
, '\npdb name:', pdb_name
|
||||
, '\nactual pdb name:', actual_pdb_filename
|
||||
, '\nmutation file:', infile_muts
|
||||
, '\nchain1:', args.chain1
|
||||
, '\noutput file:', outfile_foldx
|
||||
, '\n=============================================================')
|
||||
#=======================================================================
|
||||
|
||||
def getInteractionEnergy(filename):
|
||||
data = pd.read_csv(filename,sep = '\t')
|
||||
return data['Interaction Energy'].loc[0]
|
||||
|
||||
def getInteractions(filename):
|
||||
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
|
||||
contactList = getIndexes(data,1)
|
||||
number = len(contactList)
|
||||
return number
|
||||
|
||||
def formatMuts(mut_file,pdbname):
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
muts = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
muts.append(mut)
|
||||
|
||||
mut_list = []
|
||||
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(outfile, 'w') as output:
|
||||
for m in muts:
|
||||
print(m)
|
||||
mut = m[:1] + chainA+ m[1:]
|
||||
mut_list.append(mut)
|
||||
mut = mut + ';'
|
||||
print(mut)
|
||||
output.write(mut)
|
||||
output.write('\n')
|
||||
return mut_list
|
||||
|
||||
def getIndexes(data, value):
|
||||
colnames = data.columns.values
|
||||
listOfPos = list()
|
||||
result = data.isin([value])
|
||||
result.columns = colnames
|
||||
seriesdata = result.any()
|
||||
columnNames = list(seriesdata[seriesdata==True].index)
|
||||
for col in columnNames:
|
||||
rows = list(result[col][result[col]==True].index)
|
||||
|
||||
for row in rows:
|
||||
listOfPos.append((row,col))
|
||||
|
||||
return listOfPos
|
||||
|
||||
def loadFiles(df):
|
||||
# load a text file in to np matrix
|
||||
resultList = []
|
||||
f = open(df,'r')
|
||||
for line in f:
|
||||
line = line.rstrip('\n')
|
||||
aVals = line.split('\t')
|
||||
fVals = list(map(np.float32, sVals))
|
||||
resultList.append(fVals)
|
||||
f.close()
|
||||
return np.asarray(resultList, dtype=np.float32)
|
||||
|
||||
#=======================================================================
|
||||
def main():
|
||||
pdbname = pdb_name
|
||||
comp = '' # for complex only
|
||||
mut_filename = infile_muts #pnca_test_snps.csv
|
||||
mutlist = formatMuts(mut_filename, pdbname)
|
||||
|
||||
print(mutlist)
|
||||
nmuts = len(mutlist)
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
print('start')
|
||||
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
|
||||
subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
|
||||
|
||||
print('end')
|
||||
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
|
||||
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
|
||||
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
|
||||
|
||||
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
|
||||
|
||||
if comp=='y':
|
||||
chain1=chainA
|
||||
chain2=chainB
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
|
||||
for n in range(1,nmuts+1):
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
|
||||
|
||||
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
|
||||
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
|
||||
dGdata = pd.read_csv(dGdatafile, sep = '\t')
|
||||
|
||||
ddG=[]
|
||||
print('ddG')
|
||||
print(len(dGdata))
|
||||
for i in range(0,len(dGdata)):
|
||||
ddG.append(dGdata['total energy'].loc[i])
|
||||
|
||||
|
||||
nint = len(interactions)
|
||||
wt_int = []
|
||||
|
||||
for i in interactions:
|
||||
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
|
||||
wt_int.append(getInteractions(filename))
|
||||
print('wt')
|
||||
print(wt_int)
|
||||
|
||||
ntotal = nint+1
|
||||
print(ntotal)
|
||||
print(nmuts)
|
||||
data = np.empty((ntotal,nmuts))
|
||||
data[0] = ddG
|
||||
print(data)
|
||||
for i in range(0,len(interactions)):
|
||||
d=[]
|
||||
p=0
|
||||
for n in range(1, nmuts+1):
|
||||
print(i)
|
||||
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
|
||||
mut = getInteractions(filename)
|
||||
diff = wt_int[i] - mut
|
||||
print(diff)
|
||||
print(wt_int[i])
|
||||
print(mut)
|
||||
d.append(diff)
|
||||
print(d)
|
||||
data[i+1] = d
|
||||
|
||||
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
print(interactions)
|
||||
|
||||
IE = []
|
||||
if comp=='y':
|
||||
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
wtE = getInteractionEnergy(wtfilename)
|
||||
print(wtE)
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
|
||||
mutE = getInteractionEnergy(filename)
|
||||
print(mutE)
|
||||
diff = wtE - mutE
|
||||
print(diff)
|
||||
IE.append(diff)
|
||||
print(IE)
|
||||
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
|
||||
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
|
||||
IEresults.to_csv(IEfilename)
|
||||
print(len(IE))
|
||||
data = np.append(data,[IE], axis = 0)
|
||||
print(data)
|
||||
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||
|
||||
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
mutlist = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
mutlist.append(mut)
|
||||
print(mutlist)
|
||||
print(len(mutlist))
|
||||
print(data)
|
||||
results = pd.DataFrame(data, columns = mutlist, index = interactions)
|
||||
results.append(ddG)
|
||||
#print(results.head())
|
||||
|
||||
# my style formatted results
|
||||
results2 = results.T # transpose df
|
||||
results2.index.name = 'mutationinformation' # assign name to index
|
||||
results2 = results2.reset_index() # turn it into a columns
|
||||
|
||||
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
|
||||
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
|
||||
|
||||
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
|
||||
|
||||
# lower case columns
|
||||
results2.columns = results2.columns.str.lower()
|
||||
|
||||
print('Writing file in the format below:\n'
|
||||
, results2.head()
|
||||
, '\nNo. of rows:', len(results2)
|
||||
, '\nNo. of cols:', len(results2.columns))
|
||||
|
||||
outputfilename = outfile_foldx
|
||||
#outputfilename = 'foldx_results_' + pdbname + '.csv'
|
||||
#results.to_csv(outputfilename)
|
||||
results2.to_csv(outputfilename, index = False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
7
foldx/runPrintNetworks.sh
Executable file
7
foldx/runPrintNetworks.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
PDB=$1
|
||||
n=$2
|
||||
OUTDIR=$3
|
||||
logger "Running runPrintNetworks"
|
||||
cd ${OUTDIR}
|
||||
|
||||
foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
10
foldx/runcomplex.sh
Executable file
10
foldx/runcomplex.sh
Executable file
|
@ -0,0 +1,10 @@
|
|||
PDB=$1
|
||||
A=$2
|
||||
B=$3
|
||||
OUTDIR=$4
|
||||
cd ${OUTDIR}
|
||||
logger "Running runcomplex"
|
||||
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||
|
9
foldx/runfoldx.sh
Executable file
9
foldx/runfoldx.sh
Executable file
|
@ -0,0 +1,9 @@
|
|||
PDB=$1
|
||||
OUTDIR=$2
|
||||
cd ${OUTDIR}
|
||||
pwd
|
||||
ls
|
||||
logger "Running runfoldx"
|
||||
foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR}
|
||||
foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||
foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
92
mcsm/mcsm.py
92
mcsm/mcsm.py
|
@ -14,8 +14,6 @@ import numpy as np
|
|||
from mcsm import *
|
||||
#==============================
|
||||
#%% global variables for defs
|
||||
|
||||
|
||||
#==============================
|
||||
#%%
|
||||
|
||||
|
@ -194,9 +192,9 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
#############
|
||||
# format colnames: all lowercase, remove spaces and use '_' to join
|
||||
print('Assigning meaningful colnames i.e without spaces and hyphen and reflecting units'
|
||||
, '\n===================================================================')
|
||||
, '\n=======================================================')
|
||||
my_colnames_dict = {'Predicted Affinity Change': 'PredAffLog' # relevant info from this col will be extracted and the column discarded
|
||||
, 'Mutation information': 'mutation_information' # {wild_type}<position>{mutant_type}
|
||||
, 'Mutation information': 'mutationinformation' # {wild_type}<position>{mutant_type}
|
||||
, 'Wild-type': 'wild_type' # one letter amino acid code
|
||||
, 'Position': 'position' # number
|
||||
, 'Mutant-type': 'mutant_type' # one letter amino acid code
|
||||
|
@ -206,41 +204,41 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, 'DUET stability change': 'duet_stability_change'} # in kcal/mol
|
||||
|
||||
mcsm_data.rename(columns = my_colnames_dict, inplace = True)
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#################################
|
||||
# populate mutation_information
|
||||
# populate mutationinformation
|
||||
# col which is currently blank
|
||||
#################################
|
||||
# populate mutation_information column:mcsm style muts {WT}<POS>{MUT}
|
||||
print('Populating column : mutation_information which is currently empty\n', mcsm_data['mutation_information'])
|
||||
mcsm_data['mutation_information'] = mcsm_data['wild_type'] + mcsm_data['position'].astype(str) + mcsm_data['mutant_type']
|
||||
print('checking after populating:\n', mcsm_data['mutation_information']
|
||||
, '\n===================================================================')
|
||||
# populate mutationinformation column:mcsm style muts {WT}<POS>{MUT}
|
||||
print('Populating column : mutationinformation which is currently empty\n', mcsm_data['mutationinformation'])
|
||||
mcsm_data['mutationinformation'] = mcsm_data['wild_type'] + mcsm_data['position'].astype(str) + mcsm_data['mutant_type']
|
||||
print('checking after populating:\n', mcsm_data['mutationinformation']
|
||||
, '\n=======================================================')
|
||||
|
||||
# Remove spaces b/w pasted columns
|
||||
print('removing white space within column: \mutation_information')
|
||||
mcsm_data['mutation_information'] = mcsm_data['mutation_information'].str.replace(' ', '')
|
||||
print('Correctly formatted column: mutation_information\n', mcsm_data['mutation_information']
|
||||
, '\n===================================================================')
|
||||
#%%===========================================================================
|
||||
print('removing white space within column: \mutationinformation')
|
||||
mcsm_data['mutationinformation'] = mcsm_data['mutationinformation'].str.replace(' ', '')
|
||||
print('Correctly formatted column: mutationinformation\n', mcsm_data['mutationinformation']
|
||||
, '\n=======================================================')
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# sanity check: drop dupliate muts
|
||||
#############
|
||||
# shouldn't exist as this should be eliminated at the time of running mcsm
|
||||
print('Sanity check:'
|
||||
, '\nChecking duplicate mutations')
|
||||
if mcsm_data['mutation_information'].duplicated().sum() == 0:
|
||||
if mcsm_data['mutationinformation'].duplicated().sum() == 0:
|
||||
print('PASS: No duplicate mutations detected (as expected)'
|
||||
, '\nDim of data:', mcsm_data.shape
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
else:
|
||||
print('WARNING: Duplicate mutations detected'
|
||||
, '\nDim of df with duplicates:', mcsm_data.shape
|
||||
, 'Removing duplicate entries')
|
||||
mcsm_data = mcsm_data.drop_duplicates(['mutation_information'])
|
||||
mcsm_data = mcsm_data.drop_duplicates(['mutationinformation'])
|
||||
print('Dim of data after removing duplicate muts:', mcsm_data.shape
|
||||
, '\n===============================================================')
|
||||
#%%===========================================================================
|
||||
, '\n===========================================================')
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# Create col: duet_outcome
|
||||
#############
|
||||
|
@ -259,8 +257,8 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
# print('FAIL: DUET outcome assigned incorrectly'
|
||||
# , '\nExpected no. of stabilising mutations:', DUET_pos
|
||||
# , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
|
||||
# , '\n===============================================================')
|
||||
#%%===========================================================================
|
||||
# , '\n======================================================')
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# Extract numeric
|
||||
# part of ligand_distance col
|
||||
|
@ -271,7 +269,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
print('extracting numeric part of col: ligand_distance')
|
||||
mcsm_data['ligand_distance'] = mcsm_data['ligand_distance'].str.extract('(\d+\.?\d*)')
|
||||
print('Ligand Distance:',mcsm_data['ligand_distance'])
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# Create 2 columns:
|
||||
# ligand_affinity_change and ligand_outcome
|
||||
|
@ -282,7 +280,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
# categorocal part: '\b(\w+ing)\b'
|
||||
print('Extracting numerical and categorical parts from the col: PredAffLog')
|
||||
print('to create two columns: ligand_affinity_change and ligand_outcome'
|
||||
, '\n===================================================================')
|
||||
, '\n=======================================================')
|
||||
|
||||
# 1) Extracting the predicted affinity change (numerical part)
|
||||
mcsm_data['ligand_affinity_change'] = mcsm_data['PredAffLog'].str.extract('(-?\d+\.?\d*)', expand = True)
|
||||
|
@ -308,24 +306,24 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
if check.all():
|
||||
print('PASS: spelling change successfull'
|
||||
, '\nNo. of predicted affinity changes:\n', british_spl
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
else:
|
||||
sys.exit('FAIL: spelling change unsucessfull'
|
||||
, '\nExpected:\n', american_spl
|
||||
, '\nGot:\n', british_spl
|
||||
, '\n===============================================================')
|
||||
#%%===========================================================================
|
||||
, '\n===================================================')
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# ensuring corrrect dtype for numeric columns
|
||||
#############
|
||||
# check dtype in cols
|
||||
print('Checking dtypes in all columns:\n', mcsm_data.dtypes
|
||||
, '\n===================================================================')
|
||||
, '\n=======================================================')
|
||||
print('Converting the following cols to numeric:'
|
||||
, '\nligand_distance'
|
||||
, '\nduet_stability_change'
|
||||
, '\nligand_affinity_change'
|
||||
, '\n===================================================================')
|
||||
, '\n=======================================================')
|
||||
|
||||
# using apply method to change stabilty and affinity values to numeric
|
||||
numeric_cols = ['duet_stability_change', 'ligand_affinity_change', 'ligand_distance']
|
||||
|
@ -336,12 +334,12 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
if cols_check.all():
|
||||
print('PASS: dtypes for selected cols:', numeric_cols
|
||||
, '\nchanged to numeric'
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
else:
|
||||
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
print(mcsm_data.dtypes)
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# scale duet values
|
||||
#############
|
||||
|
@ -357,7 +355,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\n---------------------------------------------------------------'
|
||||
, '\nScaled duet scores:\n', mcsm_data['duet_scaled'])
|
||||
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# scale affinity values
|
||||
#############
|
||||
|
@ -373,7 +371,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\n---------------------------------------------------------------'
|
||||
, '\nScaled affinity scores:\n', mcsm_data['affinity_scaled'])
|
||||
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# adding column: wild_position
|
||||
# useful for plots and db
|
||||
|
@ -385,33 +383,33 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
print('removing white space within column: wild_position')
|
||||
mcsm_data['wild_position'] = mcsm_data['wild_position'].str.replace(' ', '')
|
||||
print('Correctly formatted column: wild_position\n', mcsm_data['wild_position'].head()
|
||||
, '\n===================================================================')
|
||||
, '\n=========================================================')
|
||||
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
|
||||
#############
|
||||
# ensuring corrrect dtype in non-numeric cols
|
||||
#############
|
||||
|
||||
#) char cols
|
||||
char_cols = ['PredAffLog', 'mutation_information', 'wild_type', 'mutant_type', 'chain', 'ligand_id', 'duet_outcome', 'ligand_outcome', 'wild_position']
|
||||
char_cols = ['PredAffLog', 'mutationinformation', 'wild_type', 'mutant_type', 'chain', 'ligand_id', 'duet_outcome', 'ligand_outcome', 'wild_position']
|
||||
|
||||
#mcsm_data[char_cols] = mcsm_data[char_cols].astype(str)
|
||||
cols_check_char = mcsm_data.select_dtypes(include = 'object').columns.isin(char_cols)
|
||||
|
||||
if cols_check_char.all():
|
||||
print('PASS: dtypes for char cols:', char_cols, 'are indeed string'
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
else:
|
||||
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
#mcsm_data['ligand_distance', 'ligand_affinity_change'].apply(is_numeric_dtype(mcsm_data['ligand_distance', 'ligand_affinity_change']))
|
||||
print(mcsm_data.dtypes)
|
||||
#%%=============================================================================
|
||||
#%%=====================================================================
|
||||
# Removing PredAff log column as it is not needed?
|
||||
print('Removing col: PredAffLog since relevant info has been extracted from it')
|
||||
mcsm_dataf = mcsm_data.drop(columns = ['PredAffLog'])
|
||||
#%%===========================================================================
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# sanity check before writing file
|
||||
#############
|
||||
|
@ -421,14 +419,14 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
if len(mcsm_dataf.columns) == expected_cols:
|
||||
print('PASS: formatting successful'
|
||||
, '\nformatted df has expected no. of cols:', expected_cols
|
||||
, '\n---------------------------------------------------------------'
|
||||
, '\n---------------------------------------------------'
|
||||
, '\ncolnames:', mcsm_dataf.columns
|
||||
, '\n----------------------------------------------------------------'
|
||||
, '\n---------------------------------------------------'
|
||||
, '\ndtypes in cols:', mcsm_dataf.dtypes
|
||||
, '\n----------------------------------------------------------------'
|
||||
, '\n---------------------------------------------------'
|
||||
, '\norig data shape:', dforig_shape
|
||||
, '\nformatted df shape:', mcsm_dataf.shape
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
else:
|
||||
sys.exit('FAIL: something went wrong in formatting df'
|
||||
, '\nLen of orig df:', dforig_len
|
||||
|
@ -438,7 +436,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
, '\nCheck formatting:'
|
||||
, '\ncheck hardcoded value:', expected_ncols_toadd
|
||||
, '\nis', expected_ncols_toadd, 'the no. of expected cols to add?'
|
||||
, '\n===============================================================')
|
||||
, '\n===================================================')
|
||||
|
||||
|
||||
return mcsm_dataf
|
||||
|
|
|
@ -71,7 +71,7 @@ infile2 = outdir + '/' + in_filename_afor_kin
|
|||
print('Input file0:', infile0
|
||||
, '\nInput file1:', infile1
|
||||
, '\nInput file2:', infile2
|
||||
, '\n===================================================================')
|
||||
, '\n=============================================================')
|
||||
|
||||
#=======
|
||||
# output
|
||||
|
@ -79,7 +79,7 @@ print('Input file0:', infile0
|
|||
out_filename = gene.lower() + '_metadata_afs_ors.csv'
|
||||
outfile = outdir + '/' + out_filename
|
||||
print('Output file:', outfile
|
||||
, '\n===================================================================')
|
||||
, '\n=============================================================')
|
||||
|
||||
|
||||
del(in_filename_afor, in_filename_afor_kin, datadir, indir, outdir)
|
||||
|
@ -217,13 +217,7 @@ if fail:
|
|||
del(left_df, right_df, common_cols, merging_cols, nmerging_cols, my_join, ndiff1, ndiff2, missing_mutinfo
|
||||
, expected_rows, expected_cols, fail)
|
||||
del(afor_df, snpinfo_df)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#=======================================================================
|
||||
|
||||
#%% Second merge: combined_df1 and afor_kin_df
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue