diff --git a/foldx/mutrenamefiles.sh b/foldx/mutrenamefiles.sh new file mode 100755 index 0000000..88a5d03 --- /dev/null +++ b/foldx/mutrenamefiles.sh @@ -0,0 +1,69 @@ +PDB=$1 +n=$2 +OUTDIR=$3 +cd ${OUTDIR} +logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}" +cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt +sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt + + + + + + diff --git a/foldx/mutrenamefiles_mac.sh b/foldx/mutrenamefiles_mac.sh new file mode 100755 index 0000000..b0e2fe9 --- /dev/null +++ b/foldx/mutrenamefiles_mac.sh @@ -0,0 +1,68 @@ +PDB=$1 +n=$2 +#cd /home/tanu/git/LSHTM_analysis/foldx/ +logger "Running mutrenamefiles_mac" +cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt +sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt + + + + + + diff --git a/foldx/mutruncomplex.sh b/foldx/mutruncomplex.sh new file mode 100755 index 0000000..2b2c4e1 --- /dev/null +++ b/foldx/mutruncomplex.sh @@ -0,0 +1,10 @@ +PDB=$1 +A=$2 +B=$3 +n=$4 +OUTDIR=$5 +cd ${OUTDIR} +logger "Running mutruncomplex" +foldx --command=AnalyseComplex --pdb="${PDB}_Repair_${n}.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 +cp ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt +#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt diff --git a/foldx/renamefiles.sh b/foldx/renamefiles.sh new file mode 100755 index 0000000..553ba5f --- /dev/null +++ b/foldx/renamefiles.sh @@ -0,0 +1,70 @@ +PDB=$1 +OUTDIR=$2 +cd ${OUTDIR} +logger "Running renamefiles" +cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt +sed -i '1,8d' Dif_${PDB}_Repair.txt +cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt +cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt +sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt +cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt +cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt +sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt +sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt + + + + + + diff --git a/foldx/renamefiles_mac.sh b/foldx/renamefiles_mac.sh new file mode 100755 index 0000000..ea517bc --- /dev/null +++ b/foldx/renamefiles_mac.sh @@ -0,0 +1,68 @@ +PDB=$1 +logger "Running renamefiles_mac" +#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt +sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt +cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt +cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt +cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt +cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt +cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt +cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt +cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt +sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt +sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt +sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt +sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt +cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt +cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt +cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt +cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt +cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt +cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt +cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt +cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt +cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt +cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt +cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt +cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt +sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt + + + + + + diff --git a/foldx/repairPDB.sh b/foldx/repairPDB.sh new file mode 100755 index 0000000..ee1a13c --- /dev/null +++ b/foldx/repairPDB.sh @@ -0,0 +1,9 @@ +INDIR=$1 +PDB=$2 +OUTDIR=$3 + +logger "Running repairPDB" + +#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} + +foldx --command=RepairPDB --pdb-dir=${INDIR} --pdb=${PDB} --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR} diff --git a/foldx/runFoldx.py b/foldx/runFoldx.py new file mode 100755 index 0000000..b1bbcd4 --- /dev/null +++ b/foldx/runFoldx.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +import subprocess +import os +import numpy as np +import pandas as pd +from contextlib import suppress +from pathlib import Path +import re +import csv +import argparse +#https://realpython.com/python-pathlib/ + +# FIXME +#strong dependency of file and path names +#cannot pass file with path. Need to pass them separately +#assumptions made for dir struc as standard +#datadir + drug + input + +#======================================================================= +#%% specify input and curr dir +homedir = os.path.expanduser('~') + +# set working dir +os.getcwd() +os.chdir(homedir + '/git/LSHTM_analysis/foldx/') +os.getcwd() + +#======================================================================= +#%% command line args +arg_parser = argparse.ArgumentParser() + +arg_parser.add_argument('-d', '--drug', help = 'drug name', default = 'pyrazinamide') +arg_parser.add_argument('-g', '--gene', help = 'gene name', default = 'pncA') # case sensitive + + +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + drug + input', default = None) +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + drug + output', default = None) + + +arg_parser.add_argument('-f', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called gene_complex.pdb', default = None) +arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called gene_test_snps.csv exists', default = None) + +arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive +arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive + +args = arg_parser.parse_args() +#======================================================================= +#%% variable assignment: input and output +#drug = 'pyrazinamide' +#gene = 'pncA' +#gene_match = gene + '_p.' +#%%===================================================================== +# Command Line Options +drug = args.drug +gene = args.gene +indir = args.input_dir +outdir = args.output_dir +mut_filename = args.mutation_file +chainA = args.chain1 +chainB = args.chain2 +pdb_filename = args.pdb_file + +# os.path.splitext will fail interestingly with file.pdb.txt.zip +#pdb_name = os.path.splitext(pdb_file)[0] +# Just the filename, thanks +#pdb_name = Path(in_filename_pdb).stem + +#========== +# dir +#========== +datadir = homedir + '/' + 'git/Data' + +if not indir: + indir = datadir + '/' + drug + '/' + 'input' + +if not outdir: + outdir = datadir + '/' + drug + '/' + 'output' +# FIXME: +process_dir = datadir + '/' + drug +'/' + 'processing' +# FIXME: this is a temporary directory and should be correctly handled +os.mkdir(process_dir) +#======= +# input +#======= +# FIXME + + +if pdb_filename: + pdb_name = Path(pdb_filename).stem +else: + pdb_filename = gene.lower() + '_complex.pdb' + pdb_name = Path(pdb_filename).stem + +infile_pdb = indir + '/' + pdb_filename +actual_pdb_filename = Path(infile_pdb).name + +if mut_filename: + mutation_file = mut_filename +else: + #mutation_file = gene.lower() + '_mcsm_snps.csv' #real + mutation_file = gene.lower() + '_test_snps.csv' #test + +infile_muts = outdir + '/' + mutation_file + +#======= +# output +#======= +out_filename = gene.lower() + '_foldx_results.csv' +outfile_foldx = outdir + '/' + out_filename + +print('Arguments being passed:' +, '\nDrug:', args.drug +, '\ngene:', args.gene +, '\ninput dir:', indir +, '\noutput dir:', outdir +, '\npdb file:', infile_pdb +, '\npdb name:', pdb_name +, '\nactual pdb name:', actual_pdb_filename +, '\nmutation file:', infile_muts +, '\nchain1:', args.chain1 +, '\noutput file:', outfile_foldx +, '\n=============================================================') +#======================================================================= + +def getInteractionEnergy(filename): + data = pd.read_csv(filename,sep = '\t') + return data['Interaction Energy'].loc[0] + +def getInteractions(filename): + data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t') + contactList = getIndexes(data,1) + number = len(contactList) + return number + +def formatMuts(mut_file,pdbname): + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + muts = [] + for row in readCSV: + mut = row[0] + muts.append(mut) + + mut_list = [] + outfile = process_dir + '/individual_list_' + pdbname + '.txt' + with open(outfile, 'w') as output: + for m in muts: + print(m) + mut = m[:1] + chainA+ m[1:] + mut_list.append(mut) + mut = mut + ';' + print(mut) + output.write(mut) + output.write('\n') + return mut_list + +def getIndexes(data, value): + colnames = data.columns.values + listOfPos = list() + result = data.isin([value]) + result.columns = colnames + seriesdata = result.any() + columnNames = list(seriesdata[seriesdata==True].index) + for col in columnNames: + rows = list(result[col][result[col]==True].index) + + for row in rows: + listOfPos.append((row,col)) + + return listOfPos + +def loadFiles(df): + # load a text file in to np matrix + resultList = [] + f = open(df,'r') + for line in f: + line = line.rstrip('\n') + aVals = line.split('\t') + fVals = list(map(np.float32, sVals)) + resultList.append(fVals) + f.close() + return np.asarray(resultList, dtype=np.float32) + +#======================================================================= +def main(): + pdbname = pdb_name + comp = '' # for complex only + mut_filename = infile_muts #pnca_test_snps.csv + mutlist = formatMuts(mut_filename, pdbname) + + print(mutlist) + nmuts = len(mutlist) + print(nmuts) + print(mutlist) + print('start') + #subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir]) + subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir]) + + print('end') + output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir]) + + for n in range(1,nmuts+1): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir]) + + for n in range(1,nmuts+1): + print(n) + with suppress(Exception): + subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir]) + + out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir]) + + if comp=='y': + chain1=chainA + chain2=chainB + with suppress(Exception): + subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir]) + for n in range(1,nmuts+1): + with suppress(Exception): + subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir]) + + interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', + 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', + 'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt' + dGdata = pd.read_csv(dGdatafile, sep = '\t') + + ddG=[] + print('ddG') + print(len(dGdata)) + for i in range(0,len(dGdata)): + ddG.append(dGdata['total energy'].loc[i]) + + + nint = len(interactions) + wt_int = [] + + for i in interactions: + filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt' + wt_int.append(getInteractions(filename)) + print('wt') + print(wt_int) + + ntotal = nint+1 + print(ntotal) + print(nmuts) + data = np.empty((ntotal,nmuts)) + data[0] = ddG + print(data) + for i in range(0,len(interactions)): + d=[] + p=0 + for n in range(1, nmuts+1): + print(i) + filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt' + mut = getInteractions(filename) + diff = wt_int[i] - mut + print(diff) + print(wt_int[i]) + print(mut) + d.append(diff) + print(d) + data[i+1] = d + + interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', +'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS'] + + print(interactions) + + IE = [] + if comp=='y': + wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt' + wtE = getInteractionEnergy(wtfilename) + print(wtE) + for n in range(1,nmuts+1): + print(n) + filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt' + mutE = getInteractionEnergy(filename) + print(mutE) + diff = wtE - mutE + print(diff) + IE.append(diff) + print(IE) + IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist) + IEfilename = 'foldx_complexresults_'+pdbname+'.csv' + IEresults.to_csv(IEfilename) + print(len(IE)) + data = np.append(data,[IE], axis = 0) + print(data) + interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM', +'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy'] + + mut_file = process_dir + '/individual_list_' + pdbname + '.txt' + with open(mut_file) as csvfile: + readCSV = csv.reader(csvfile) + mutlist = [] + for row in readCSV: + mut = row[0] + mutlist.append(mut) + print(mutlist) + print(len(mutlist)) + print(data) + results = pd.DataFrame(data, columns = mutlist, index = interactions) + results.append(ddG) + #print(results.head()) + + # my style formatted results + results2 = results.T # transpose df + results2.index.name = 'mutationinformation' # assign name to index + results2 = results2.reset_index() # turn it into a columns + + results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id) + results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space + + results2.rename(columns = {'Distances': 'Contacts'}, inplace = True) + + # lower case columns + results2.columns = results2.columns.str.lower() + + print('Writing file in the format below:\n' + , results2.head() + , '\nNo. of rows:', len(results2) + , '\nNo. of cols:', len(results2.columns)) + + outputfilename = outfile_foldx + #outputfilename = 'foldx_results_' + pdbname + '.csv' + #results.to_csv(outputfilename) + results2.to_csv(outputfilename, index = False) + +if __name__ == '__main__': + main() diff --git a/foldx/runPrintNetworks.sh b/foldx/runPrintNetworks.sh new file mode 100755 index 0000000..c2bebab --- /dev/null +++ b/foldx/runPrintNetworks.sh @@ -0,0 +1,7 @@ +PDB=$1 +n=$2 +OUTDIR=$3 +logger "Running runPrintNetworks" +cd ${OUTDIR} + +foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} diff --git a/foldx/runcomplex.sh b/foldx/runcomplex.sh new file mode 100755 index 0000000..9cfd32a --- /dev/null +++ b/foldx/runcomplex.sh @@ -0,0 +1,10 @@ +PDB=$1 +A=$2 +B=$3 +OUTDIR=$4 +cd ${OUTDIR} +logger "Running runcomplex" +foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} +cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt +#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt + diff --git a/foldx/runfoldx.sh b/foldx/runfoldx.sh new file mode 100755 index 0000000..5a929ce --- /dev/null +++ b/foldx/runfoldx.sh @@ -0,0 +1,9 @@ +PDB=$1 +OUTDIR=$2 +cd ${OUTDIR} +pwd +ls +logger "Running runfoldx" +foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR} +foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} +foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR} diff --git a/mcsm/mcsm.py b/mcsm/mcsm.py index c9a84eb..9eb0e56 100644 --- a/mcsm/mcsm.py +++ b/mcsm/mcsm.py @@ -14,8 +14,6 @@ import numpy as np from mcsm import * #============================== #%% global variables for defs - - #============================== #%% @@ -194,9 +192,9 @@ def format_mcsm_output(mcsm_outputcsv): ############# # format colnames: all lowercase, remove spaces and use '_' to join print('Assigning meaningful colnames i.e without spaces and hyphen and reflecting units' - , '\n===================================================================') + , '\n=======================================================') my_colnames_dict = {'Predicted Affinity Change': 'PredAffLog' # relevant info from this col will be extracted and the column discarded - , 'Mutation information': 'mutation_information' # {wild_type}{mutant_type} + , 'Mutation information': 'mutationinformation' # {wild_type}{mutant_type} , 'Wild-type': 'wild_type' # one letter amino acid code , 'Position': 'position' # number , 'Mutant-type': 'mutant_type' # one letter amino acid code @@ -206,41 +204,41 @@ def format_mcsm_output(mcsm_outputcsv): , 'DUET stability change': 'duet_stability_change'} # in kcal/mol mcsm_data.rename(columns = my_colnames_dict, inplace = True) -#%%=========================================================================== +#%%===================================================================== ################################# - # populate mutation_information + # populate mutationinformation # col which is currently blank ################################# - # populate mutation_information column:mcsm style muts {WT}{MUT} - print('Populating column : mutation_information which is currently empty\n', mcsm_data['mutation_information']) - mcsm_data['mutation_information'] = mcsm_data['wild_type'] + mcsm_data['position'].astype(str) + mcsm_data['mutant_type'] - print('checking after populating:\n', mcsm_data['mutation_information'] - , '\n===================================================================') + # populate mutationinformation column:mcsm style muts {WT}{MUT} + print('Populating column : mutationinformation which is currently empty\n', mcsm_data['mutationinformation']) + mcsm_data['mutationinformation'] = mcsm_data['wild_type'] + mcsm_data['position'].astype(str) + mcsm_data['mutant_type'] + print('checking after populating:\n', mcsm_data['mutationinformation'] + , '\n=======================================================') # Remove spaces b/w pasted columns - print('removing white space within column: \mutation_information') - mcsm_data['mutation_information'] = mcsm_data['mutation_information'].str.replace(' ', '') - print('Correctly formatted column: mutation_information\n', mcsm_data['mutation_information'] - , '\n===================================================================') -#%%=========================================================================== + print('removing white space within column: \mutationinformation') + mcsm_data['mutationinformation'] = mcsm_data['mutationinformation'].str.replace(' ', '') + print('Correctly formatted column: mutationinformation\n', mcsm_data['mutationinformation'] + , '\n=======================================================') +#%%===================================================================== ############# # sanity check: drop dupliate muts ############# # shouldn't exist as this should be eliminated at the time of running mcsm print('Sanity check:' , '\nChecking duplicate mutations') - if mcsm_data['mutation_information'].duplicated().sum() == 0: + if mcsm_data['mutationinformation'].duplicated().sum() == 0: print('PASS: No duplicate mutations detected (as expected)' , '\nDim of data:', mcsm_data.shape - , '\n===============================================================') + , '\n===================================================') else: print('WARNING: Duplicate mutations detected' , '\nDim of df with duplicates:', mcsm_data.shape , 'Removing duplicate entries') - mcsm_data = mcsm_data.drop_duplicates(['mutation_information']) + mcsm_data = mcsm_data.drop_duplicates(['mutationinformation']) print('Dim of data after removing duplicate muts:', mcsm_data.shape - , '\n===============================================================') -#%%=========================================================================== + , '\n===========================================================') +#%%===================================================================== ############# # Create col: duet_outcome ############# @@ -259,8 +257,8 @@ def format_mcsm_output(mcsm_outputcsv): # print('FAIL: DUET outcome assigned incorrectly' # , '\nExpected no. of stabilising mutations:', DUET_pos # , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising'] - # , '\n===============================================================') -#%%=========================================================================== + # , '\n======================================================') +#%%===================================================================== ############# # Extract numeric # part of ligand_distance col @@ -271,7 +269,7 @@ def format_mcsm_output(mcsm_outputcsv): print('extracting numeric part of col: ligand_distance') mcsm_data['ligand_distance'] = mcsm_data['ligand_distance'].str.extract('(\d+\.?\d*)') print('Ligand Distance:',mcsm_data['ligand_distance']) -#%%=========================================================================== +#%%===================================================================== ############# # Create 2 columns: # ligand_affinity_change and ligand_outcome @@ -282,7 +280,7 @@ def format_mcsm_output(mcsm_outputcsv): # categorocal part: '\b(\w+ing)\b' print('Extracting numerical and categorical parts from the col: PredAffLog') print('to create two columns: ligand_affinity_change and ligand_outcome' - , '\n===================================================================') + , '\n=======================================================') # 1) Extracting the predicted affinity change (numerical part) mcsm_data['ligand_affinity_change'] = mcsm_data['PredAffLog'].str.extract('(-?\d+\.?\d*)', expand = True) @@ -308,24 +306,24 @@ def format_mcsm_output(mcsm_outputcsv): if check.all(): print('PASS: spelling change successfull' , '\nNo. of predicted affinity changes:\n', british_spl - , '\n===============================================================') + , '\n===================================================') else: sys.exit('FAIL: spelling change unsucessfull' , '\nExpected:\n', american_spl , '\nGot:\n', british_spl - , '\n===============================================================') -#%%=========================================================================== + , '\n===================================================') +#%%===================================================================== ############# # ensuring corrrect dtype for numeric columns ############# # check dtype in cols print('Checking dtypes in all columns:\n', mcsm_data.dtypes - , '\n===================================================================') + , '\n=======================================================') print('Converting the following cols to numeric:' , '\nligand_distance' , '\nduet_stability_change' , '\nligand_affinity_change' - , '\n===================================================================') + , '\n=======================================================') # using apply method to change stabilty and affinity values to numeric numeric_cols = ['duet_stability_change', 'ligand_affinity_change', 'ligand_distance'] @@ -336,12 +334,12 @@ def format_mcsm_output(mcsm_outputcsv): if cols_check.all(): print('PASS: dtypes for selected cols:', numeric_cols , '\nchanged to numeric' - , '\n===============================================================') + , '\n===================================================') else: sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful' - , '\n===============================================================') + , '\n===================================================') print(mcsm_data.dtypes) -#%%=========================================================================== +#%%===================================================================== ############# # scale duet values ############# @@ -357,7 +355,7 @@ def format_mcsm_output(mcsm_outputcsv): , '\n---------------------------------------------------------------' , '\nScaled duet scores:\n', mcsm_data['duet_scaled']) -#%%=========================================================================== +#%%===================================================================== ############# # scale affinity values ############# @@ -373,7 +371,7 @@ def format_mcsm_output(mcsm_outputcsv): , '\n---------------------------------------------------------------' , '\nScaled affinity scores:\n', mcsm_data['affinity_scaled']) -#%%=========================================================================== +#%%===================================================================== ############# # adding column: wild_position # useful for plots and db @@ -385,33 +383,33 @@ def format_mcsm_output(mcsm_outputcsv): print('removing white space within column: wild_position') mcsm_data['wild_position'] = mcsm_data['wild_position'].str.replace(' ', '') print('Correctly formatted column: wild_position\n', mcsm_data['wild_position'].head() - , '\n===================================================================') + , '\n=========================================================') -#%%=========================================================================== +#%%===================================================================== ############# # ensuring corrrect dtype in non-numeric cols ############# #) char cols - char_cols = ['PredAffLog', 'mutation_information', 'wild_type', 'mutant_type', 'chain', 'ligand_id', 'duet_outcome', 'ligand_outcome', 'wild_position'] + char_cols = ['PredAffLog', 'mutationinformation', 'wild_type', 'mutant_type', 'chain', 'ligand_id', 'duet_outcome', 'ligand_outcome', 'wild_position'] #mcsm_data[char_cols] = mcsm_data[char_cols].astype(str) cols_check_char = mcsm_data.select_dtypes(include = 'object').columns.isin(char_cols) if cols_check_char.all(): print('PASS: dtypes for char cols:', char_cols, 'are indeed string' - , '\n===============================================================') + , '\n===================================================') else: sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful' - , '\n===============================================================') + , '\n===================================================') #mcsm_data['ligand_distance', 'ligand_affinity_change'].apply(is_numeric_dtype(mcsm_data['ligand_distance', 'ligand_affinity_change'])) print(mcsm_data.dtypes) -#%%============================================================================= +#%%===================================================================== # Removing PredAff log column as it is not needed? print('Removing col: PredAffLog since relevant info has been extracted from it') mcsm_dataf = mcsm_data.drop(columns = ['PredAffLog']) -#%%=========================================================================== +#%%===================================================================== ############# # sanity check before writing file ############# @@ -421,14 +419,14 @@ def format_mcsm_output(mcsm_outputcsv): if len(mcsm_dataf.columns) == expected_cols: print('PASS: formatting successful' , '\nformatted df has expected no. of cols:', expected_cols - , '\n---------------------------------------------------------------' + , '\n---------------------------------------------------' , '\ncolnames:', mcsm_dataf.columns - , '\n----------------------------------------------------------------' + , '\n---------------------------------------------------' , '\ndtypes in cols:', mcsm_dataf.dtypes - , '\n----------------------------------------------------------------' + , '\n---------------------------------------------------' , '\norig data shape:', dforig_shape , '\nformatted df shape:', mcsm_dataf.shape - , '\n===============================================================') + , '\n===================================================') else: sys.exit('FAIL: something went wrong in formatting df' , '\nLen of orig df:', dforig_len @@ -438,7 +436,7 @@ def format_mcsm_output(mcsm_outputcsv): , '\nCheck formatting:' , '\ncheck hardcoded value:', expected_ncols_toadd , '\nis', expected_ncols_toadd, 'the no. of expected cols to add?' - , '\n===============================================================') + , '\n===================================================') return mcsm_dataf diff --git a/scripts/combine_afs_ors.py b/scripts/combine_afs_ors.py index fb21980..2fb0f8f 100755 --- a/scripts/combine_afs_ors.py +++ b/scripts/combine_afs_ors.py @@ -71,7 +71,7 @@ infile2 = outdir + '/' + in_filename_afor_kin print('Input file0:', infile0 , '\nInput file1:', infile1 , '\nInput file2:', infile2 - , '\n===================================================================') + , '\n=============================================================') #======= # output @@ -79,7 +79,7 @@ print('Input file0:', infile0 out_filename = gene.lower() + '_metadata_afs_ors.csv' outfile = outdir + '/' + out_filename print('Output file:', outfile - , '\n===================================================================') + , '\n=============================================================') del(in_filename_afor, in_filename_afor_kin, datadir, indir, outdir) @@ -217,13 +217,7 @@ if fail: del(left_df, right_df, common_cols, merging_cols, nmerging_cols, my_join, ndiff1, ndiff2, missing_mutinfo , expected_rows, expected_cols, fail) del(afor_df, snpinfo_df) - - - - - - - +#======================================================================= #%% Second merge: combined_df1 and afor_kin_df