test2 bugfixes
This commit is contained in:
parent
7a9b16255a
commit
1f8cfc2403
13 changed files with 851 additions and 69 deletions
|
@ -35,11 +35,12 @@ arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)',
|
||||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!')
|
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||||
|
|
||||||
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||||
|
|
||||||
|
# FIXME: Doesn't work with 2 chains yet!
|
||||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||||
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||||
|
|
||||||
|
@ -101,7 +102,7 @@ actual_pdb_filename = Path(infile_pdb).name
|
||||||
if mut_filename:
|
if mut_filename:
|
||||||
mutation_file = mut_filename
|
mutation_file = mut_filename
|
||||||
else:
|
else:
|
||||||
mutation_file = gene.lower() + '_mcsm_snps.csv'
|
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||||
|
|
||||||
infile_muts = outdir + '/' + mutation_file
|
infile_muts = outdir + '/' + mutation_file
|
||||||
|
|
||||||
|
|
|
@ -7,4 +7,3 @@ logger "Running runcomplex"
|
||||||
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||||
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||||
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||||
|
|
||||||
|
|
61
foldx/test2/mutrenamefiles.sh
Executable file
61
foldx/test2/mutrenamefiles.sh
Executable file
|
@ -0,0 +1,61 @@
|
||||||
|
PDB=$1
|
||||||
|
n=$2
|
||||||
|
#cd /home/git/LSHTM_analysis/foldx/test/
|
||||||
|
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_Electro_${PDB}_Repair_${n}_PN.fxout Matrix_Electro_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_${n}_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout Matrix_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_${n}_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout Matrix_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_${n}_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout Matrix_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||||
|
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
10
foldx/test2/mutruncomplex.sh
Executable file
10
foldx/test2/mutruncomplex.sh
Executable file
|
@ -0,0 +1,10 @@
|
||||||
|
PDB=$1
|
||||||
|
A=$2
|
||||||
|
B=$3
|
||||||
|
n=$4
|
||||||
|
OUTDIR=$5
|
||||||
|
cd ${OUTDIR}
|
||||||
|
logger "Running mutruncomplex"
|
||||||
|
foldx --command=AnalyseComplex --pdb="${PDB}_Repair_${n}.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1
|
||||||
|
cp ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt
|
||||||
|
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_${n}_AC.txt
|
62
foldx/test2/renamefiles.sh
Executable file
62
foldx/test2/renamefiles.sh
Executable file
|
@ -0,0 +1,62 @@
|
||||||
|
PDB=$1
|
||||||
|
#cd /home/git/LSHTM_analysis/foldx/test
|
||||||
|
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||||
|
sed -i '1,8d' Dif_${PDB}_Repair.txt
|
||||||
|
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_Electro_${PDB}_Repair_PN.fxout Matrix_Electro_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Electro_${PDB}_Repair_PN.fxout > Matrix_Electro_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_Disulfide_${PDB}_Repair_PN.fxout Matrix_Disulfide_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Disulfide_${PDB}_Repair_PN.fxout > Matrix_Disulfide_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_Partcov_${PDB}_Repair_PN.fxout Matrix_Partcov_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_Partcov_${PDB}_Repair_PN.fxout > Matrix_Partcov_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp Matrix_VdWClashes_${PDB}_Repair_PN.fxout Matrix_VdWClashes_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '5,190p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_RR_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_MM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
|
||||||
|
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||||
|
cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||||
|
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||||
|
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
9
foldx/test2/repairPDB.sh
Executable file
9
foldx/test2/repairPDB.sh
Executable file
|
@ -0,0 +1,9 @@
|
||||||
|
INDIR=$1
|
||||||
|
PDB=$2
|
||||||
|
OUTDIR=$3
|
||||||
|
|
||||||
|
logger "Running repairPDB"
|
||||||
|
|
||||||
|
#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
|
||||||
|
|
||||||
|
foldx --command=RepairPDB --pdb-dir=${INDIR} --pdb=${PDB} --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
|
344
foldx/test2/runFoldx.py
Executable file
344
foldx/test2/runFoldx.py
Executable file
|
@ -0,0 +1,344 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from contextlib import suppress
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
import csv
|
||||||
|
import argparse
|
||||||
|
#https://realpython.com/python-pathlib/
|
||||||
|
|
||||||
|
# FIXME
|
||||||
|
#strong dependency of file and path names
|
||||||
|
#cannot pass file with path. Need to pass them separately
|
||||||
|
#assumptions made for dir struc as standard
|
||||||
|
#datadir + drug + input
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
#%% specify input and curr dir
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
|
||||||
|
# set working dir
|
||||||
|
os.getcwd()
|
||||||
|
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||||
|
#os.getcwd()
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
#%% command line args
|
||||||
|
arg_parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
|
||||||
|
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None)
|
||||||
|
|
||||||
|
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||||
|
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||||
|
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||||
|
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||||
|
|
||||||
|
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||||
|
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||||
|
|
||||||
|
# FIXME: Doesn't work with 2 chains yet!
|
||||||
|
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||||
|
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||||
|
|
||||||
|
args = arg_parser.parse_args()
|
||||||
|
#=======================================================================
|
||||||
|
#%% variable assignment: input and output
|
||||||
|
#drug = 'pyrazinamide'
|
||||||
|
#gene = 'pncA'
|
||||||
|
#gene_match = gene + '_p.'
|
||||||
|
#%%=====================================================================
|
||||||
|
# Command line options
|
||||||
|
drug = args.drug
|
||||||
|
gene = args.gene
|
||||||
|
|
||||||
|
datadir = args.datadir
|
||||||
|
indir = args.input_dir
|
||||||
|
outdir = args.output_dir
|
||||||
|
process_dir = args.process_dir
|
||||||
|
|
||||||
|
mut_filename = args.mutation_file
|
||||||
|
chainA = args.chain1
|
||||||
|
chainB = args.chain2
|
||||||
|
pdb_filename = args.pdb_file
|
||||||
|
|
||||||
|
# os.path.splitext will fail interestingly with file.pdb.txt.zip
|
||||||
|
#pdb_name = os.path.splitext(pdb_file)[0]
|
||||||
|
# Just the filename, thanks
|
||||||
|
#pdb_name = Path(in_filename_pdb).stem
|
||||||
|
|
||||||
|
#==============
|
||||||
|
# directories
|
||||||
|
#==============
|
||||||
|
if not datadir:
|
||||||
|
datadir = homedir + '/' + 'git/Data'
|
||||||
|
|
||||||
|
if not indir:
|
||||||
|
indir = datadir + '/' + drug + '/input'
|
||||||
|
|
||||||
|
if not outdir:
|
||||||
|
outdir = datadir + '/' + drug + '/output'
|
||||||
|
|
||||||
|
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
|
||||||
|
#if not process_dir:
|
||||||
|
# process_dir = datadir + '/' + drug + '/processing'
|
||||||
|
|
||||||
|
# Make all paths absolute in case the user forgot
|
||||||
|
indir = os.path.abspath(indir)
|
||||||
|
process_dir = os.path.abspath(process_dir)
|
||||||
|
outdir = os.path.abspath(outdir)
|
||||||
|
datadir = os.path.abspath(datadir)
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# input
|
||||||
|
#=======
|
||||||
|
# FIXME
|
||||||
|
if pdb_filename:
|
||||||
|
pdb_name = Path(pdb_filename).stem
|
||||||
|
else:
|
||||||
|
pdb_filename = gene.lower() + '_complex.pdb'
|
||||||
|
pdb_name = Path(pdb_filename).stem
|
||||||
|
|
||||||
|
infile_pdb = indir + '/' + pdb_filename
|
||||||
|
actual_pdb_filename = Path(infile_pdb).name
|
||||||
|
|
||||||
|
if mut_filename:
|
||||||
|
mutation_file = os.path.abspath(mut_filename)
|
||||||
|
infile_muts = mutation_file
|
||||||
|
print('User-provided mutation file in use:', infile_muts)
|
||||||
|
else:
|
||||||
|
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||||
|
infile_muts = outdir + '/' + mutation_file
|
||||||
|
print('WARNING: Assuming default mutation file:', infile_muts)
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
out_filename = gene.lower() + '_foldx.csv'
|
||||||
|
outfile_foldx = outdir + '/' + out_filename
|
||||||
|
|
||||||
|
print('Arguments being passed:'
|
||||||
|
, '\nDrug:', args.drug
|
||||||
|
, '\ngene:', args.gene
|
||||||
|
, '\ninput dir:', indir
|
||||||
|
, '\nprocess dir:', process_dir
|
||||||
|
, '\noutput dir:', outdir
|
||||||
|
, '\npdb file:', infile_pdb
|
||||||
|
, '\npdb name:', pdb_name
|
||||||
|
, '\nactual pdb name:', actual_pdb_filename
|
||||||
|
, '\nmutation file:', infile_muts
|
||||||
|
, '\nchain1:', args.chain1
|
||||||
|
, '\noutput file:', outfile_foldx
|
||||||
|
, '\n=============================================================')
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
def getInteractionEnergy(filename):
|
||||||
|
data = pd.read_csv(filename,sep = '\t')
|
||||||
|
return data['Interaction Energy'].loc[0]
|
||||||
|
|
||||||
|
def getInteractions(filename):
|
||||||
|
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
|
||||||
|
contactList = getIndexes(data,1)
|
||||||
|
number = len(contactList)
|
||||||
|
return number
|
||||||
|
|
||||||
|
def formatMuts(mut_file,pdbname):
|
||||||
|
with open(mut_file) as csvfile:
|
||||||
|
readCSV = csv.reader(csvfile)
|
||||||
|
muts = []
|
||||||
|
for row in readCSV:
|
||||||
|
mut = row[0]
|
||||||
|
muts.append(mut)
|
||||||
|
|
||||||
|
mut_list = []
|
||||||
|
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||||
|
with open(outfile, 'w') as output:
|
||||||
|
for m in muts:
|
||||||
|
print(m)
|
||||||
|
mut = m[:1] + chainA+ m[1:]
|
||||||
|
mut_list.append(mut)
|
||||||
|
mut = mut + ';'
|
||||||
|
print(mut)
|
||||||
|
output.write(mut)
|
||||||
|
output.write('\n')
|
||||||
|
return mut_list
|
||||||
|
|
||||||
|
def getIndexes(data, value):
|
||||||
|
colnames = data.columns.values
|
||||||
|
listOfPos = list()
|
||||||
|
result = data.isin([value])
|
||||||
|
result.columns = colnames
|
||||||
|
seriesdata = result.any()
|
||||||
|
columnNames = list(seriesdata[seriesdata==True].index)
|
||||||
|
for col in columnNames:
|
||||||
|
rows = list(result[col][result[col]==True].index)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
listOfPos.append((row,col))
|
||||||
|
|
||||||
|
return listOfPos
|
||||||
|
|
||||||
|
def loadFiles(df):
|
||||||
|
# load a text file in to np matrix
|
||||||
|
resultList = []
|
||||||
|
f = open(df,'r')
|
||||||
|
for line in f:
|
||||||
|
line = line.rstrip('\n')
|
||||||
|
aVals = line.split('\t')
|
||||||
|
fVals = list(map(np.float32, sVals))
|
||||||
|
resultList.append(fVals)
|
||||||
|
f.close()
|
||||||
|
return np.asarray(resultList, dtype=np.float32)
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
def main():
|
||||||
|
pdbname = pdb_name
|
||||||
|
comp = '' # for complex only
|
||||||
|
mut_filename = infile_muts #pnca_mcsm_snps.csv
|
||||||
|
mutlist = formatMuts(mut_filename, pdbname)
|
||||||
|
|
||||||
|
print(mutlist)
|
||||||
|
nmuts = len(mutlist)
|
||||||
|
print(nmuts)
|
||||||
|
print(mutlist)
|
||||||
|
print('start')
|
||||||
|
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
|
||||||
|
subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
|
||||||
|
|
||||||
|
print('end')
|
||||||
|
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
|
||||||
|
|
||||||
|
for n in range(1,nmuts+1):
|
||||||
|
print(n)
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
|
||||||
|
|
||||||
|
for n in range(1,nmuts+1):
|
||||||
|
print(n)
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
|
||||||
|
|
||||||
|
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
|
||||||
|
|
||||||
|
if comp=='y':
|
||||||
|
chain1=chainA
|
||||||
|
chain2=chainB
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
|
||||||
|
for n in range(1,nmuts+1):
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
|
||||||
|
|
||||||
|
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
|
||||||
|
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||||
|
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||||
|
|
||||||
|
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
|
||||||
|
dGdata = pd.read_csv(dGdatafile, sep = '\t')
|
||||||
|
|
||||||
|
ddG=[]
|
||||||
|
print('ddG')
|
||||||
|
print(len(dGdata))
|
||||||
|
for i in range(0,len(dGdata)):
|
||||||
|
ddG.append(dGdata['total energy'].loc[i])
|
||||||
|
|
||||||
|
|
||||||
|
nint = len(interactions)
|
||||||
|
wt_int = []
|
||||||
|
|
||||||
|
for i in interactions:
|
||||||
|
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
|
||||||
|
wt_int.append(getInteractions(filename))
|
||||||
|
print('wt')
|
||||||
|
print(wt_int)
|
||||||
|
|
||||||
|
ntotal = nint+1
|
||||||
|
print(ntotal)
|
||||||
|
print(nmuts)
|
||||||
|
data = np.empty((ntotal,nmuts))
|
||||||
|
data[0] = ddG
|
||||||
|
print(data)
|
||||||
|
for i in range(0,len(interactions)):
|
||||||
|
d=[]
|
||||||
|
p=0
|
||||||
|
for n in range(1, nmuts+1):
|
||||||
|
print(i)
|
||||||
|
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
|
||||||
|
mut = getInteractions(filename)
|
||||||
|
diff = wt_int[i] - mut
|
||||||
|
print(diff)
|
||||||
|
print(wt_int[i])
|
||||||
|
print(mut)
|
||||||
|
d.append(diff)
|
||||||
|
print(d)
|
||||||
|
data[i+1] = d
|
||||||
|
|
||||||
|
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||||
|
|
||||||
|
print(interactions)
|
||||||
|
|
||||||
|
IE = []
|
||||||
|
if comp=='y':
|
||||||
|
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||||
|
wtE = getInteractionEnergy(wtfilename)
|
||||||
|
print(wtE)
|
||||||
|
for n in range(1,nmuts+1):
|
||||||
|
print(n)
|
||||||
|
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
|
||||||
|
mutE = getInteractionEnergy(filename)
|
||||||
|
print(mutE)
|
||||||
|
diff = wtE - mutE
|
||||||
|
print(diff)
|
||||||
|
IE.append(diff)
|
||||||
|
print(IE)
|
||||||
|
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
|
||||||
|
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
|
||||||
|
IEresults.to_csv(IEfilename)
|
||||||
|
print(len(IE))
|
||||||
|
data = np.append(data,[IE], axis = 0)
|
||||||
|
print(data)
|
||||||
|
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||||
|
|
||||||
|
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||||
|
with open(mut_file) as csvfile:
|
||||||
|
readCSV = csv.reader(csvfile)
|
||||||
|
mutlist = []
|
||||||
|
for row in readCSV:
|
||||||
|
mut = row[0]
|
||||||
|
mutlist.append(mut)
|
||||||
|
print(mutlist)
|
||||||
|
print(len(mutlist))
|
||||||
|
print(data)
|
||||||
|
results = pd.DataFrame(data, columns = mutlist, index = interactions)
|
||||||
|
results.append(ddG)
|
||||||
|
#print(results.head())
|
||||||
|
|
||||||
|
# my style formatted results
|
||||||
|
results2 = results.T # transpose df
|
||||||
|
results2.index.name = 'mutationinformation' # assign name to index
|
||||||
|
results2 = results2.reset_index() # turn it into a columns
|
||||||
|
|
||||||
|
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
|
||||||
|
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
|
||||||
|
|
||||||
|
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
|
||||||
|
|
||||||
|
# lower case columns
|
||||||
|
results2.columns = results2.columns.str.lower()
|
||||||
|
|
||||||
|
print('Writing file in the format below:\n'
|
||||||
|
, results2.head()
|
||||||
|
, '\nNo. of rows:', len(results2)
|
||||||
|
, '\nNo. of cols:', len(results2.columns))
|
||||||
|
|
||||||
|
outputfilename = outfile_foldx
|
||||||
|
#outputfilename = 'foldx_results_' + pdbname + '.csv'
|
||||||
|
#results.to_csv(outputfilename)
|
||||||
|
results2.to_csv(outputfilename, index = False)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
250
foldx/test2/runFoldx_test.py
Executable file
250
foldx/test2/runFoldx_test.py
Executable file
|
@ -0,0 +1,250 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from contextlib import suppress
|
||||||
|
import re
|
||||||
|
import csv
|
||||||
|
|
||||||
|
def getInteractions(filename):
|
||||||
|
data = pd.read_csv(filename, index_col=0, header =0, sep="\t")
|
||||||
|
contactList = getIndexes(data,1)
|
||||||
|
print(contactList)
|
||||||
|
number = len(contactList)
|
||||||
|
return number
|
||||||
|
|
||||||
|
def formatMuts(mut_file,pdbname):
|
||||||
|
with open(mut_file) as csvfile:
|
||||||
|
readCSV = csv.reader(csvfile)
|
||||||
|
muts = []
|
||||||
|
for row in readCSV:
|
||||||
|
mut = row[0]
|
||||||
|
muts.append(mut)
|
||||||
|
|
||||||
|
mut_list = []
|
||||||
|
outfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/individual_list_"+pdbname+".txt"
|
||||||
|
with open(outfile, "w") as output:
|
||||||
|
for m in muts:
|
||||||
|
print(m)
|
||||||
|
mut = m[:1]+'A'+m[1:]
|
||||||
|
mut_list.append(mut)
|
||||||
|
mut = mut + ";"
|
||||||
|
print(mut)
|
||||||
|
output.write(mut)
|
||||||
|
output.write("\n")
|
||||||
|
return mut_list
|
||||||
|
|
||||||
|
def getIndexes(data, value):
|
||||||
|
colnames = data.columns.values
|
||||||
|
listOfPos = list()
|
||||||
|
result = data.isin([value])
|
||||||
|
result.columns=colnames
|
||||||
|
seriesdata = result.any()
|
||||||
|
columnNames = list(seriesdata[seriesdata==True].index)
|
||||||
|
for col in columnNames:
|
||||||
|
rows = list(result[col][result[col]==True].index)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
listOfPos.append((row,col))
|
||||||
|
|
||||||
|
return listOfPos
|
||||||
|
|
||||||
|
def loadFiles(df):
|
||||||
|
# load a text file in to np matrix
|
||||||
|
resultList = []
|
||||||
|
f = open(df,'r')
|
||||||
|
for line in f:
|
||||||
|
line = line.rstrip('\n')
|
||||||
|
aVals = line.split("\t")
|
||||||
|
fVals = list(map(np.float32, sVals))
|
||||||
|
resultList.append(fVals)
|
||||||
|
f.close()
|
||||||
|
return np.asarray(resultList, dtype=np.float32)
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
def main():
|
||||||
|
pdbname = '3pl1'
|
||||||
|
mut_filename = "pnca_muts_sample.csv"
|
||||||
|
mutlist = formatMuts(mut_filename, pdbname)
|
||||||
|
|
||||||
|
print(mutlist)
|
||||||
|
nmuts = len(mutlist)+1
|
||||||
|
print(nmuts)
|
||||||
|
print(mutlist)
|
||||||
|
print("start")
|
||||||
|
|
||||||
|
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname])
|
||||||
|
print("end")
|
||||||
|
for n in range(1,nmuts):
|
||||||
|
print(n)
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname,str(n)])
|
||||||
|
|
||||||
|
for n in range(1,nmuts):
|
||||||
|
print(n)
|
||||||
|
with suppress(Exception):
|
||||||
|
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname,str(n)])
|
||||||
|
|
||||||
|
|
||||||
|
out = subprocess.check_output(['bash','renamefiles.sh',pdbname])
|
||||||
|
|
||||||
|
dGdatafile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Dif_"+pdbname+"_Repair.txt"
|
||||||
|
dGdata = pd.read_csv(dGdatafile, sep="\t")
|
||||||
|
print(dGdata)
|
||||||
|
ddG=[]
|
||||||
|
for i in range(0,len(dGdata)):
|
||||||
|
ddG.append(dGdata['total energy'].loc[i])
|
||||||
|
print(ddG)
|
||||||
|
distfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nc = getInteractions(distfile)
|
||||||
|
|
||||||
|
elecfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_neRR = getInteractions(elecfileRR)
|
||||||
|
|
||||||
|
elecfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_neMM = getInteractions(elecfileMM)
|
||||||
|
|
||||||
|
elecfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_neSM = getInteractions(elecfileSM)
|
||||||
|
|
||||||
|
elecfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_neSS = getInteractions(elecfileSS)
|
||||||
|
|
||||||
|
disufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_ndRR = getInteractions(disufileRR)
|
||||||
|
|
||||||
|
disufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_ndMM = getInteractions(disufileMM)
|
||||||
|
|
||||||
|
disufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_ndSM = getInteractions(disufileSM)
|
||||||
|
|
||||||
|
disufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_ndSS = getInteractions(disufileSS)
|
||||||
|
|
||||||
|
hbndfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nhRR = getInteractions(hbndfileRR)
|
||||||
|
|
||||||
|
hbndfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nhMM = getInteractions(hbndfileMM)
|
||||||
|
|
||||||
|
hbndfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nhSM = getInteractions(hbndfileSM)
|
||||||
|
|
||||||
|
hbndfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nhSS = getInteractions(hbndfileSS)
|
||||||
|
|
||||||
|
partfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_npRR = getInteractions(partfileRR)
|
||||||
|
|
||||||
|
partfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_npMM = getInteractions(partfileMM)
|
||||||
|
|
||||||
|
partfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_npSM = getInteractions(partfileSM)
|
||||||
|
|
||||||
|
partfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_npSS = getInteractions(partfileSS)
|
||||||
|
|
||||||
|
vdwcfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvRR = getInteractions(vdwcfileRR)
|
||||||
|
|
||||||
|
vdwcfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvMM = getInteractions(vdwcfileMM)
|
||||||
|
|
||||||
|
vdwcfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvSM = getInteractions(vdwcfileSM)
|
||||||
|
|
||||||
|
vdwcfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvSS = getInteractions(vdwcfileSS)
|
||||||
|
|
||||||
|
volufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvoRR = getInteractions(volufileRR)
|
||||||
|
|
||||||
|
volufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_MM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvoMM = getInteractions(volufileMM)
|
||||||
|
|
||||||
|
volufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SM_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvoSM = getInteractions(volufileSM)
|
||||||
|
|
||||||
|
volufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SS_"+pdbname+"_Repair_PN.txt"
|
||||||
|
wt_nvoSS = getInteractions(volufileSS)
|
||||||
|
|
||||||
|
dnc = []
|
||||||
|
dneRR = []
|
||||||
|
dneMM = []
|
||||||
|
dneSM = []
|
||||||
|
dneSS = []
|
||||||
|
dndRR = []
|
||||||
|
dndMM = []
|
||||||
|
dndSM = []
|
||||||
|
dndSS = []
|
||||||
|
dnhRR = []
|
||||||
|
dnhMM = []
|
||||||
|
dnhSM = []
|
||||||
|
dnhSS = []
|
||||||
|
dnpRR = []
|
||||||
|
dnpMM = []
|
||||||
|
dnpSM = []
|
||||||
|
dnpSS = []
|
||||||
|
dnvRR = []
|
||||||
|
dnvMM = []
|
||||||
|
dnvSM = []
|
||||||
|
dnvSS = []
|
||||||
|
dnvoRR = []
|
||||||
|
dnvoMM = []
|
||||||
|
dnvoSM = []
|
||||||
|
dnvoSS = []
|
||||||
|
for n in range(1, nmuts):
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_nc = getInteractions(filename)
|
||||||
|
diffc = wt_nc - mut_nc
|
||||||
|
dnc.append(diffc)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_neRR = getInteractions(filename)
|
||||||
|
diffeRR = wt_neRR - mut_neRR
|
||||||
|
dneRR.append(diffeRR)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_ndRR = getInteractions(filename)
|
||||||
|
diffdRR = wt_ndRR - mut_ndRR
|
||||||
|
dndRR.append(diffdRR)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_nhRR = getInteractions(filename)
|
||||||
|
diffhRR = wt_nhRR - mut_nhRR
|
||||||
|
dnhRR.append(diffhRR)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_npRR = getInteractions(filename)
|
||||||
|
diffpRR = wt_npRR - mut_npRR
|
||||||
|
dnpRR.append(diffpRR)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_nvRR = getInteractions(filename)
|
||||||
|
diffvRR = wt_nvRR - mut_nvRR
|
||||||
|
dnvRR.append(diffvRR)
|
||||||
|
|
||||||
|
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||||
|
mut_nvoRR = getInteractions(filename)
|
||||||
|
diffvoRR = wt_nvoRR - mut_nvoRR
|
||||||
|
dnvoRR.append(diffvoRR)
|
||||||
|
print(dnc)
|
||||||
|
print(dneRR)
|
||||||
|
print(dndRR)
|
||||||
|
print(dnhRR)
|
||||||
|
print(dnpRR)
|
||||||
|
print(dnvRR)
|
||||||
|
print(dnvoRR)
|
||||||
|
|
||||||
|
results = pd.DataFrame([(ddG),(dnc),(dneRR),(dndRR),(dnhRR),(dnpRR),(dnvRR),(dnvoRR)], columns=mutlist, index=["ddG","contacts","electro","disulfide","hbonds","partcov","VdWClashes","volumetric"])
|
||||||
|
results.append(ddG)
|
||||||
|
print(results)
|
||||||
|
results2 = results.T # transpose df
|
||||||
|
outputfilename = "foldx_results_"+pdbname+".csv"
|
||||||
|
# results.to_csv(outputfilename)
|
||||||
|
results2.to_csv(outputfilename)
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
7
foldx/test2/runPrintNetworks.sh
Executable file
7
foldx/test2/runPrintNetworks.sh
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
PDB=$1
|
||||||
|
n=$2
|
||||||
|
OUTDIR=$3
|
||||||
|
logger "Running runPrintNetworks"
|
||||||
|
cd ${OUTDIR}
|
||||||
|
|
||||||
|
foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
9
foldx/test2/runcomplex.sh
Executable file
9
foldx/test2/runcomplex.sh
Executable file
|
@ -0,0 +1,9 @@
|
||||||
|
PDB=$1
|
||||||
|
A=$2
|
||||||
|
B=$3
|
||||||
|
OUTDIR=$4
|
||||||
|
cd ${OUTDIR}
|
||||||
|
logger "Running runcomplex"
|
||||||
|
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||||
|
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||||
|
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
9
foldx/test2/runfoldx.sh
Executable file
9
foldx/test2/runfoldx.sh
Executable file
|
@ -0,0 +1,9 @@
|
||||||
|
PDB=$1
|
||||||
|
OUTDIR=$2
|
||||||
|
cd ${OUTDIR}
|
||||||
|
pwd
|
||||||
|
ls -l
|
||||||
|
logger "Running runfoldx"
|
||||||
|
foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=.
|
||||||
|
foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=.
|
||||||
|
foldx --command=SequenceDetail --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=.
|
|
@ -26,7 +26,7 @@ Created on Tue Aug 6 12:56:03 2019
|
||||||
# 1) <gene>_gwas.csv
|
# 1) <gene>_gwas.csv
|
||||||
# 2) <gene>_common_ids.csv
|
# 2) <gene>_common_ids.csv
|
||||||
# 3) <gene>_ambiguous_muts.csv
|
# 3) <gene>_ambiguous_muts.csv
|
||||||
# 4) <gene>_mcsm_snps.csv
|
# 4) <gene>_mcsm_formatted_snps.csv
|
||||||
# 5) <gene>_metadata_poscounts.csv
|
# 5) <gene>_metadata_poscounts.csv
|
||||||
# 6) <gene>_metadata.csv
|
# 6) <gene>_metadata.csv
|
||||||
# 7) <gene>_all_muts_msa.csv
|
# 7) <gene>_all_muts_msa.csv
|
||||||
|
@ -81,8 +81,8 @@ indir = args.input_dir
|
||||||
outdir = args.output_dir
|
outdir = args.output_dir
|
||||||
make_dirs = args.make_dirs
|
make_dirs = args.make_dirs
|
||||||
|
|
||||||
#drug = 'ethambutol'
|
#drug = 'streptomycin'
|
||||||
#gene = 'embB'
|
#gene = 'gid'
|
||||||
|
|
||||||
#%% input and output dirs and files
|
#%% input and output dirs and files
|
||||||
#=======
|
#=======
|
||||||
|
@ -122,15 +122,15 @@ if make_dirs:
|
||||||
# handle missing dirs here
|
# handle missing dirs here
|
||||||
if not os.path.isdir(datadir):
|
if not os.path.isdir(datadir):
|
||||||
print('ERROR: Data directory does not exist:', datadir
|
print('ERROR: Data directory does not exist:', datadir
|
||||||
, '\nPlease create and ensure gwas data is present and then rerun\nelse specify cmd option ---make_dirs')
|
, '\nPlease create and ensure gwas data is present and then rerun\nelse specify cmd option --make_dirs')
|
||||||
sys.exit()
|
sys.exit()
|
||||||
if not os.path.isdir(indir):
|
if not os.path.isdir(indir):
|
||||||
print('ERROR: Input directory does not exist:', indir
|
print('ERROR: Input directory does not exist:', indir
|
||||||
, '\nPlease either create or specify indir and rerun\nelse specify cmd option ---make_dirs')
|
, '\nPlease either create or specify indir and rerun\nelse specify cmd option --make_dirs')
|
||||||
sys.exit()
|
sys.exit()
|
||||||
if not os.path.isdir(outdir):
|
if not os.path.isdir(outdir):
|
||||||
print('ERROR: Output directory does not exist:', outdir
|
print('ERROR: Output directory does not exist:', outdir
|
||||||
, '\nPlease create or specify outdir and rerun\nelse specify cmd option ---make_dirs')
|
, '\nPlease create or specify outdir and rerun\nelse specify cmd option --make_dirs')
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# Requires
|
# Requires
|
||||||
|
@ -317,7 +317,7 @@ for i, id in enumerate(clean_df.id):
|
||||||
print('RESULTS:')
|
print('RESULTS:')
|
||||||
print('Total WT in dr_muts_col:', wt)
|
print('Total WT in dr_muts_col:', wt)
|
||||||
print('Total matches of', gene, 'SNP matches in', dr_muts_col, ':', dr_gene_count)
|
print('Total matches of', gene, 'SNP matches in', dr_muts_col, ':', dr_gene_count)
|
||||||
print('Total samples with > 1', gene, 'muts in dr_muts_col:', len(id2_dr) )
|
print('Total samples with > 1', gene, 'nsSNPs in dr_muts_col:', len(id2_dr) )
|
||||||
print('=================================================================')
|
print('=================================================================')
|
||||||
|
|
||||||
del(clean_df, na_count, i, id, wt, id2_dr, count_gene_dr, count_wt)
|
del(clean_df, na_count, i, id, wt, id2_dr, count_gene_dr, count_wt)
|
||||||
|
@ -361,7 +361,7 @@ for i, id in enumerate(clean_df.id):
|
||||||
print('RESULTS:')
|
print('RESULTS:')
|
||||||
print('Total WT in other_muts_col:', wt_other)
|
print('Total WT in other_muts_col:', wt_other)
|
||||||
print('Total matches of', gene, 'SNP matches in', other_muts_col, ':', other_gene_count)
|
print('Total matches of', gene, 'SNP matches in', other_muts_col, ':', other_gene_count)
|
||||||
print('Total samples with > 1', gene, 'muts in other_muts_col:', len(id2_other) )
|
print('Total samples with > 1', gene, 'nsSNPs in other_muts_col:', len(id2_other) )
|
||||||
print('=================================================================')
|
print('=================================================================')
|
||||||
|
|
||||||
print('Predicting total no. of rows in the curated df:', dr_gene_count + other_gene_count
|
print('Predicting total no. of rows in the curated df:', dr_gene_count + other_gene_count
|
||||||
|
@ -851,7 +851,7 @@ else:
|
||||||
, '\nMuts are unique to dr_ and other_ mutation class'
|
, '\nMuts are unique to dr_ and other_ mutation class'
|
||||||
, '\n=========================================================')
|
, '\n=========================================================')
|
||||||
|
|
||||||
# inspect dr_muts and other muts
|
# inspect dr_muts and other muts: Fixed in case no ambiguous muts detected!
|
||||||
if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
||||||
print('Finding ambiguous muts...'
|
print('Finding ambiguous muts...'
|
||||||
, '\n========================================================='
|
, '\n========================================================='
|
||||||
|
@ -861,23 +861,36 @@ if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
||||||
, '\nTotal no. of samples in other_muts present in dr_muts:', other_muts.isin(dr_muts).sum()
|
, '\nTotal no. of samples in other_muts present in dr_muts:', other_muts.isin(dr_muts).sum()
|
||||||
, '\nThese are:\n', other_muts[other_muts.isin(dr_muts)]
|
, '\nThese are:\n', other_muts[other_muts.isin(dr_muts)]
|
||||||
, '\n=========================================================')
|
, '\n=========================================================')
|
||||||
else:
|
|
||||||
sys.exit('Error: ambiguous muts present, but extraction failed. Debug!')
|
|
||||||
|
|
||||||
print('Counting no. of ambiguous muts...')
|
print('Counting no. of ambiguous muts...'
|
||||||
|
, '\nNo. of ambiguous muts in dr:'
|
||||||
|
, len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist())
|
||||||
|
, '\nNo. of ambiguous muts in other:'
|
||||||
|
, len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist())
|
||||||
|
, '\n=========================================================')
|
||||||
|
|
||||||
if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique():
|
if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique():
|
||||||
common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()
|
common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()
|
||||||
print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts))
|
print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts))
|
||||||
, '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n')
|
, '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n')
|
||||||
print('\n===========================================================')
|
print('\n===========================================================')
|
||||||
else:
|
else:
|
||||||
print('Error: ambiguous muts detected, but extraction failed. Debug!'
|
#sys.exit('Error: ambiguous muts present, but extraction failed. Debug!')
|
||||||
, '\nNo. of ambiguous muts in dr:'
|
print('No: ambiguous muts present')
|
||||||
, len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist())
|
|
||||||
, '\nNo. of ambiguous muts in other:'
|
#print('Counting no. of ambiguous muts...')
|
||||||
, len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist())
|
#if dr_muts[dr_muts.isin(other_muts)].nunique() == other_muts[other_muts.isin(dr_muts)].nunique():
|
||||||
, '\n=========================================================')
|
# common_muts = dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist()
|
||||||
|
# print('Distinct no. of ambigiuous muts detected:'+ str(len(common_muts))
|
||||||
|
# , '\nlist of ambiguous mutations (see below):', *common_muts, sep = '\n')
|
||||||
|
# print('\n===========================================================')
|
||||||
|
#else:
|
||||||
|
# print('Error: ambiguous muts detected, but extraction failed. Debug!'
|
||||||
|
# , '\nNo. of ambiguous muts in dr:'
|
||||||
|
# , len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist())
|
||||||
|
# , '\nNo. of ambiguous muts in other:'
|
||||||
|
# , len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist())
|
||||||
|
# , '\n=========================================================')
|
||||||
|
|
||||||
#%% clear variables
|
#%% clear variables
|
||||||
del(id_dr, id_other
|
del(id_dr, id_other
|
||||||
|
@ -893,25 +906,24 @@ del(c1, c2, col_to_split1, col_to_split2, comp_gene_samples, dr_WF0, dr_df, dr_m
|
||||||
#print(outdir)
|
#print(outdir)
|
||||||
#dr_muts.to_csv('dr_muts.csv', header = True)
|
#dr_muts.to_csv('dr_muts.csv', header = True)
|
||||||
#other_muts.to_csv('other_muts.csv', header = True)
|
#other_muts.to_csv('other_muts.csv', header = True)
|
||||||
|
if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
||||||
|
out_filename_ambig_muts = gene.lower() + '_ambiguous_muts.csv'
|
||||||
|
outfile_ambig_muts = outdir + '/' + out_filename_ambig_muts
|
||||||
|
print('\n----------------------------------'
|
||||||
|
, '\nWriting file: ambiguous muts'
|
||||||
|
, '\n----------------------------------'
|
||||||
|
, '\nFilename:', outfile_ambig_muts)
|
||||||
|
inspect = gene_LF1[gene_LF1['mutation'].isin(common_muts)]
|
||||||
|
inspect.to_csv(outfile_ambig_muts, index = False)
|
||||||
|
|
||||||
out_filename_ambig_muts = gene.lower() + '_ambiguous_muts.csv'
|
print('Finished writing:', out_filename_ambig_muts
|
||||||
outfile_ambig_muts = outdir + '/' + out_filename_ambig_muts
|
, '\nNo. of rows:', len(inspect)
|
||||||
print('\n----------------------------------'
|
, '\nNo. of cols:', len(inspect.columns)
|
||||||
, '\nWriting file: ambiguous muts'
|
, '\nNo. of rows = no. of samples with the ambiguous muts present:'
|
||||||
, '\n----------------------------------'
|
, dr_muts.isin(other_muts).sum() + other_muts.isin(dr_muts).sum()
|
||||||
, '\nFilename:', outfile_ambig_muts)
|
, '\n=============================================================')
|
||||||
|
del(out_filename_ambig_muts)
|
||||||
|
|
||||||
inspect = gene_LF1[gene_LF1['mutation'].isin(common_muts)]
|
|
||||||
inspect.to_csv(outfile_ambig_muts, index = False)
|
|
||||||
|
|
||||||
print('Finished writing:', out_filename_ambig_muts
|
|
||||||
, '\nNo. of rows:', len(inspect)
|
|
||||||
, '\nNo. of cols:', len(inspect.columns)
|
|
||||||
, '\nNo. of rows = no. of samples with the ambiguous muts present:'
|
|
||||||
, dr_muts.isin(other_muts).sum() + other_muts.isin(dr_muts).sum()
|
|
||||||
, '\n=============================================================')
|
|
||||||
|
|
||||||
del(out_filename_ambig_muts)
|
|
||||||
#%% end of data extraction and some files writing. Below are some more files writing.
|
#%% end of data extraction and some files writing. Below are some more files writing.
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
#%% Formatting df: read aa dict and pull relevant info
|
#%% Formatting df: read aa dict and pull relevant info
|
||||||
|
@ -1181,7 +1193,7 @@ if snps_only.mutationinformation.isna().sum() == 0:
|
||||||
else:
|
else:
|
||||||
sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?')
|
sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?')
|
||||||
|
|
||||||
out_filename_mcsmsnps = gene.lower() + '_mcsm_snps.csv'
|
out_filename_mcsmsnps = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||||
outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps
|
outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps
|
||||||
|
|
||||||
print('\n----------------------------------'
|
print('\n----------------------------------'
|
||||||
|
@ -1215,7 +1227,7 @@ metadata_pos.sort_values(by = ['meta_pos_count'], ascending = False, inplace = T
|
||||||
out_filename_metadata_poscounts = gene.lower() + '_metadata_poscounts.csv'
|
out_filename_metadata_poscounts = gene.lower() + '_metadata_poscounts.csv'
|
||||||
outfile_metadata_poscounts = outdir + '/' + out_filename_metadata_poscounts
|
outfile_metadata_poscounts = outdir + '/' + out_filename_metadata_poscounts
|
||||||
print('\n----------------------------------'
|
print('\n----------------------------------'
|
||||||
, 'Writing file: Metadata poscounts'
|
, '\nWriting file: Metadata poscounts'
|
||||||
, '\n----------------------------------'
|
, '\n----------------------------------'
|
||||||
, '\nFile:', outfile_metadata_poscounts
|
, '\nFile:', outfile_metadata_poscounts
|
||||||
, '\n============================================================')
|
, '\n============================================================')
|
||||||
|
@ -1309,7 +1321,7 @@ out_filename_pos = gene.lower() + '_mutational_positons.csv'
|
||||||
outfile_pos = outdir + '/' + out_filename_pos
|
outfile_pos = outdir + '/' + out_filename_pos
|
||||||
|
|
||||||
print('\n----------------------------------'
|
print('\n----------------------------------'
|
||||||
, 'Writing file: mutational positions'
|
, '\nWriting file: mutational positions'
|
||||||
, '\n----------------------------------'
|
, '\n----------------------------------'
|
||||||
, '\nFile:', outfile_pos
|
, '\nFile:', outfile_pos
|
||||||
, '\nNo. of distinct positions:', len(pos_only_sorted)
|
, '\nNo. of distinct positions:', len(pos_only_sorted)
|
||||||
|
@ -1349,15 +1361,14 @@ print('============================================'
|
||||||
, '\nTotal no. of samples with missense muts:', len(gene_LF1)
|
, '\nTotal no. of samples with missense muts:', len(gene_LF1)
|
||||||
, '\nTotal no. of unique samples with missense muts:', gene_LF1['id'].nunique()
|
, '\nTotal no. of unique samples with missense muts:', gene_LF1['id'].nunique()
|
||||||
, '\n'
|
, '\n'
|
||||||
, '\nTotal no.of samples with common_ids:', nu_common_ids['id']
|
, '\nTotal no.of samples with common_ids:', nu_common_ids['id'])
|
||||||
, '\nTotal no.of samples with ambiguous muts:', len(inspect)
|
|
||||||
#, '\nTotal no.of unique ambiguous muts:', len(common_muts)
|
|
||||||
, '\nTotal no.of unique ambiguous muts:', inspect['mutation'].nunique()
|
|
||||||
, '\n============================================================='
|
|
||||||
, '\n\n\n')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
||||||
|
print('\nTotal no.of samples with ambiguous muts:', len(inspect)
|
||||||
|
#, '\nTotal no.of unique ambiguous muts:', len(common_muts)
|
||||||
|
, '\nTotal no.of unique ambiguous muts:', inspect['mutation'].nunique()
|
||||||
|
, '\n============================================================='
|
||||||
|
, '\n\n\n')
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
print(u'\u2698' * 50,
|
print(u'\u2698' * 50,
|
||||||
'\nEnd of script: Data extraction and writing files'
|
'\nEnd of script: Data extraction and writing files'
|
||||||
|
|
|
@ -1,32 +1,42 @@
|
||||||
#========
|
#========
|
||||||
# data extraction: Must be run first to extract mutations for each drug-gene combination
|
# data extraction: Must be run first to extract mutations for each drug-gene combination
|
||||||
#========
|
#========
|
||||||
./data_extraction.py -d pyrazinamide -g pncA
|
./data_extraction.py -d <drug> -g <gene> --make_dirs
|
||||||
|
|
||||||
|
#========
|
||||||
|
# add chains to a PDB file: for modeller models lack chain ID, this script is used
|
||||||
|
#========
|
||||||
|
add_chains_pdb.py <N> MY_PDB.pdb
|
||||||
|
|
||||||
|
#========
|
||||||
|
# pdb data extraction: To find out discontinuity of chain and removing invalid muts to allow foldx and mcsm to run properly!
|
||||||
|
#========
|
||||||
|
In progress...
|
||||||
|
|
||||||
#========
|
#========
|
||||||
# foldx: specify chain if default is NOT 'A'
|
# foldx: specify chain if default is NOT 'A'
|
||||||
#========
|
#========
|
||||||
./runFoldx.py -d pyrazinamide -g pncA
|
./runFoldx.py -d <drug> -g <gene> -c1 A -p /media/tanu/eb1d072a-3f73-427f-aeb8-f6852b5c5216/Data/processing
|
||||||
|
|
||||||
#========
|
#========
|
||||||
# mcsm: specify chain if default is NOT 'A'
|
# mcsm: specify chain if default is NOT 'A'
|
||||||
#========
|
#========
|
||||||
./run_mcsm.py -d pyrazinamide -g pncA -s submit -l PZA --debug
|
./run_mcsm.py -d <drug> -g <gene> -s submit -l PZA --debug
|
||||||
./run_mcsm.py -d pyrazinamide -g pncA -s get
|
./run_mcsm.py -d <drug> -g <gene> pncA -s get
|
||||||
./run_mcsm.py -d pyrazinamide -g pncA -s format
|
./run_mcsm.py -d <drug> -g <gene> pncA -s format
|
||||||
|
|
||||||
#====================
|
#====================
|
||||||
# other struct params
|
# other struct params
|
||||||
#====================
|
#====================
|
||||||
./dssp_df.py -d pyrazinamide -g pncA
|
./dssp_df.py -d <drug> -g <gene>
|
||||||
# Errors on matplot.lib warn=, so just comment it out for the timebeing!: MONKEY PATCH
|
# Errors on matplot.lib warn=, so just comment it out for the timebeing!: MONKEY PATCH
|
||||||
./kd_df.py -d pyrazinamide -g pncA -fasta # fixme: NO of cols says 2, but is actually 3
|
./kd_df.py -d <drug> -g <gene> -fasta # fixme: NO of cols says 2, but is actually 3
|
||||||
./rd_df.py -d pyrazinamide -g pncA # fixme: input tsv file is sourced manually from website!
|
./rd_df.py -d <drug> -g <gene> # fixme: input tsv file is sourced manually from website!
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# af_or calcs: different types
|
# af_or calcs: different types
|
||||||
#==============================
|
#==============================
|
||||||
./af_or_calcs.R --d pyrazinamide --gene pncA # fixme: No conditional dir structure
|
./af_or_calcs.R -d <drug> -g <gene># fixme: No conditional dir structure
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# af_or calcs: kinship
|
# af_or calcs: kinship
|
||||||
|
@ -40,18 +50,18 @@ USE THE BELOW from within the or_kinship_link.py script or something?! as part o
|
||||||
# for now use the file already created using some manual wrestling to link
|
# for now use the file already created using some manual wrestling to link
|
||||||
# the OR for kinship with mutations
|
# the OR for kinship with mutations
|
||||||
|
|
||||||
./or_kinship_link.py -d pyrazinamide -g pncA -sc 2288681 -ec 2289241
|
./or_kinship_link.py -d <drug> -g <gene> -sc <start_coord> -ec <end_coord>
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# formatting: ns<gene>_snp_info.txt
|
# formatting: ns<gene>_snp_info.txt
|
||||||
#==============================
|
#==============================
|
||||||
# This adds mcsm style muts
|
# This adds mcsm style muts
|
||||||
./snpinfo_format.py -d pyrazinamide -g pncA
|
./snpinfo_format.py -d <drug> -g <gene>
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# combining dfs: combining_dfs.py
|
# combining dfs: combining_dfs.py
|
||||||
#==============================
|
#==============================
|
||||||
# FIXME: combining_FIXME.py
|
# FIXME: combining_FIXME.py
|
||||||
./combining_dfs.py -d pyrazinamide -g pncA
|
./combining_dfs.py --d <drug> -g <gene>
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue