This commit is contained in:
Tanushree Tunstall 2021-02-09 16:12:34 +00:00
commit bcf4467c44
17 changed files with 1538 additions and 109 deletions

46
dynamut/dynamut.py Executable file
View file

@ -0,0 +1,46 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 14:33:51 2020
@author: tanu
"""
#%% load packages
import os,sys
import subprocess
import argparse
import requests
import re
import time
from bs4 import BeautifulSoup
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
#%%============================================================================
#1) define muts batch
#take mcsm file
#split into 'n' batches
#write output file with suffix of batch number
#********** done this par ****************
#2) get results for a batch url
# read file
# store batch url
#extract number
#build single url
#build single results urls
#get results and store them in df
#update df
#dim of df = no. of muts in batch
#3) format results
# store unit measurements separtely
# omit unit measurements from cols
# create extra columns '_outcome' suffix by splitting numerical output
# create separate col for mcsm as it doesn't have output text
#%%============================================================================

101
dynamut/dynamut_test.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 14:33:51 2020
@author: tanu
"""
#%% load packages
import os,sys
import subprocess
import argparse
import requests
import re
import time
from bs4 import BeautifulSoup
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
#%%============================================================================
batch_result_url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/15955901077'
mut = 'S104R'
single_result_url = 'http://biosig.unimelb.edu.au/dynamut/single_results/15955901077' + '/' + mut
#%%============================================================================
param_dict = {}
result_response = requests.get(single_result_url)
if result_response.status_code == 200:
print('Fetching results')
# extract results using the html parser
soup = BeautifulSoup(result_response.text, features = 'html.parser')
#web_result_raw = soup.find(id = 'predictions').get_text()
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
ddg_encom = soup.find(id = 'ddg_encom').get_text()
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
ddg_duet = soup.find(id = 'ddg_duet').get_text()
dds_encom = soup.find(id = 'dds_encom').get_text()
param_dict = {"mutationinformation" : mut
, "ddg_dynamut" : ddg_dynamut
, "ddg_encom" : ddg_encom
, "ddg_mcsm" : ddg_mcsm
, "ddg_sdm" : ddg_sdm
, "ddg_duet" : ddg_duet
, "dds_encom" : dds_encom
}
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
#%% for loop
#%%
host_dynamut = 'http://biosig.unimelb.edu.au/dynamut'
batch_url_number = re.search(r'([0-9]+)$', batch_result_url).group(0)
single_url = host_dynamut + '/single_results/' + batch_url_number
muts = ["S104R", "G24R"]
# initilialise empty df
dynamut_results_df = pd.DataFrame()
for i, mut in enumerate(muts):
#param_dict = {}
print('Running mutation', i, ':', mut)
snp = mut
single_result_url = single_url + '/' + snp
print('Getting results from:', single_result_url)
result_response = requests.get(single_result_url)
if result_response.status_code == 200:
print('Fetching results')
# extract results using the html parser
soup = BeautifulSoup(result_response.text, features = 'html.parser')
#web_result_raw = soup.find(id = 'predictions').get_text()
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
ddg_encom = soup.find(id = 'ddg_encom').get_text()
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
ddg_duet = soup.find(id = 'ddg_duet').get_text()
dds_encom = soup.find(id = 'dds_encom').get_text()
param_dict = {"mutationinformation" : snp
, "ddg_dynamut" : ddg_dynamut
, "ddg_encom" : ddg_encom
, "ddg_mcsm" : ddg_mcsm
, "ddg_sdm" : ddg_sdm
, "ddg_duet" : ddg_duet
, "dds_encom" : dds_encom
}
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
print(results_df)
dynamut_results_df = dynamut_results_df.append(results_df)
print(dynamut_results_df)

View file

@ -2,7 +2,7 @@ PDB=$1
n=$2
OUTDIR=$3
cd ${OUTDIR}
logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}"
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
@ -61,9 +61,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidue
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt

View file

@ -1,7 +1,7 @@
PDB=$1
OUTDIR=$2
cd ${OUTDIR}
logger "Running renamefiles"
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
sed -i '1,8d' Dif_${PDB}_Repair.txt
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
@ -62,9 +62,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Vol
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import subprocess
import os
import sys
import numpy as np
import pandas as pd
from contextlib import suppress
@ -8,6 +9,8 @@ from pathlib import Path
import re
import csv
import argparse
import shutil
import time
#https://realpython.com/python-pathlib/
# FIXME
@ -22,8 +25,8 @@ homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
os.getcwd()
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
#os.getcwd()
#=======================================================================
#%% command line args
@ -35,11 +38,12 @@ arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)',
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!')
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
# FIXME: Doesn't work with 2 chains yet!
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
@ -64,11 +68,20 @@ chainA = args.chain1
chainB = args.chain2
pdb_filename = args.pdb_file
# os.path.splitext will fail interestingly with file.pdb.txt.zip
#pdb_name = os.path.splitext(pdb_file)[0]
# Just the filename, thanks
#pdb_name = Path(in_filename_pdb).stem
# Handle the case where neither 'drug'
# nor (indir,outdir,process_dir) are defined
if not drug:
if not indir or not outdir or not process_dir:
print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
sys.exit()
#==============
# directories
#==============
@ -83,27 +96,37 @@ if not outdir:
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
if not process_dir:
process_dir = datadir + '/' + drug +'/' + 'processing'
process_dir = datadir + '/' + drug + '/processing'
# Make all paths absolute in case the user forgot
indir = os.path.abspath(indir)
process_dir = os.path.abspath(process_dir)
outdir = os.path.abspath(outdir)
datadir = os.path.abspath(datadir)
#=======
# input
#=======
# FIXME
if pdb_filename:
pdb_filename = os.path.abspath(pdb_filename)
pdb_name = Path(pdb_filename).stem
infile_pdb = pdb_filename
else:
pdb_filename = gene.lower() + '_complex.pdb'
pdb_name = Path(pdb_filename).stem
infile_pdb = indir + '/' + pdb_filename
actual_pdb_filename = Path(infile_pdb).name
if mut_filename:
mutation_file = mut_filename
mutation_file = os.path.abspath(mut_filename)
infile_muts = mutation_file
print('User-provided mutation file in use:', infile_muts)
else:
mutation_file = gene.lower() + '_mcsm_snps.csv'
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
infile_muts = outdir + '/' + mutation_file
print('WARNING: Assuming default mutation file:', infile_muts)
#=======
# output
@ -115,6 +138,7 @@ print('Arguments being passed:'
, '\nDrug:', args.drug
, '\ngene:', args.gene
, '\ninput dir:', indir
, '\nprocess dir:', process_dir
, '\noutput dir:', outdir
, '\npdb file:', infile_pdb
, '\npdb name:', pdb_name
@ -123,6 +147,10 @@ print('Arguments being passed:'
, '\nchain1:', args.chain1
, '\noutput file:', outfile_foldx
, '\n=============================================================')
#### Delay for 10 seconds to check the params ####
print('Sleeping for 10 seconds to give you time to cancel')
time.sleep(10)
#=======================================================================
def getInteractionEnergy(filename):
@ -183,6 +211,19 @@ def loadFiles(df):
f.close()
return np.asarray(resultList, dtype=np.float32)
# TODO: put the subprocess call in a 'def'
#def repairPDB():
# subprocess.call(['foldx'
# , '--command=RepairPDB'
# , '--pdb-dir=' + indir
# , '--pdb=' + actual_pdb_filename
# , '--ionStrength=0.05'#
# , '--pH=7'
# , '--water=PREDICT'
# , '--vdwDesign=1'
# , 'outPDB=true'
# , '--output-dir=' + process_dir])
#=======================================================================
def main():
pdbname = pdb_name
@ -194,37 +235,128 @@ def main():
nmuts = len(mutlist)
print(nmuts)
print(mutlist)
print('start')
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
# some common parameters for foldX
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
print('end')
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
print('Running foldx RepairPDB for WT')
subprocess.call(['foldx'
, '--command=RepairPDB'
, foldx_common
, '--pdb-dir=' + os.path.dirname(pdb_filename)
, '--pdb=' + actual_pdb_filename
, 'outPDB=true'
, '--output-dir=' + process_dir])
print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
print('Running foldx BuildModel for WT')
subprocess.call(['foldx'
, '--command=BuildModel'
, foldx_common
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--mutant-file="individual_list_' + pdbname +'.txt"'
, 'outPDB=true'
, '--numberOfRuns=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx PrintNetworks for WT')
subprocess.call(['foldx'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx SequenceDetail for WT')
subprocess.call(['foldx'
, '--command=SequenceDetail'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
for n in range(1,nmuts+1):
print(n)
with suppress(Exception):
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
print('\033[95mNETWORK:\033[0m', n)
print('Running foldx PrintNetworks for mutation', n)
subprocess.call(['foldx'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
for n in range(1,nmuts+1):
print(n)
print('\033[95mMUTATION:\033[0m', n)
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
#FIXME: bad design and needs to be done in a pythonic way
with suppress(Exception):
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
# FIXME: this is bad design and needs to be done in a pythonic way
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
print('\n==========================================================')
if comp=='y':
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
chain1=chainA
chain2=chainB
with suppress(Exception):
subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
for n in range(1,nmuts+1):
with suppress(Exception):
subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
subprocess.call(['foldx'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
shutil.copyfile(ac_source, ac_dest)
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
for n in range(1,nmuts+1):
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
subprocess.call(['foldx'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
shutil.copyfile(ac_mut_source, ac_mut_dest)
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
print('\n==========================================================')
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
dGdata = pd.read_csv(dGdatafile, sep = '\t')
@ -266,8 +398,7 @@ def main():
print(d)
data[i+1] = d
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
print(interactions)
@ -291,8 +422,7 @@ def main():
print(len(IE))
data = np.append(data,[IE], axis = 0)
print(data)
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
with open(mut_file) as csvfile:
@ -330,6 +460,7 @@ def main():
#outputfilename = 'foldx_results_' + pdbname + '.csv'
#results.to_csv(outputfilename)
results2.to_csv(outputfilename, index = False)
print ('end')
if __name__ == '__main__':
main()

466
foldx/runFoldx5.py Executable file
View file

@ -0,0 +1,466 @@
#!/usr/bin/env python3
import subprocess
import os
import sys
import numpy as np
import pandas as pd
from contextlib import suppress
from pathlib import Path
import re
import csv
import argparse
import shutil
import time
#https://realpython.com/python-pathlib/
# FIXME
#strong dependency of file and path names
#cannot pass file with path. Need to pass them separately
#assumptions made for dir struc as standard
#datadir + drug + input
#=======================================================================
#%% specify input and curr dir
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
#os.getcwd()
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None)
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
# FIXME: Doesn't work with 2 chains yet!
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
#gene_match = gene + '_p.'
#%%=====================================================================
# Command line options
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
process_dir = args.process_dir
mut_filename = args.mutation_file
chainA = args.chain1
chainB = args.chain2
pdb_filename = args.pdb_file
# os.path.splitext will fail interestingly with file.pdb.txt.zip
#pdb_name = os.path.splitext(pdb_file)[0]
# Just the filename, thanks
#pdb_name = Path(in_filename_pdb).stem
# Handle the case where neither 'drug'
# nor (indir,outdir,process_dir) are defined
if not drug:
if not indir or not outdir or not process_dir:
print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
sys.exit()
#==============
# directories
#==============
if not datadir:
datadir = homedir + '/' + 'git/Data'
if not indir:
indir = datadir + '/' + drug + '/input'
if not outdir:
outdir = datadir + '/' + drug + '/output'
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
if not process_dir:
process_dir = datadir + '/' + drug + '/processing'
# Make all paths absolute in case the user forgot
indir = os.path.abspath(indir)
process_dir = os.path.abspath(process_dir)
outdir = os.path.abspath(outdir)
datadir = os.path.abspath(datadir)
#=======
# input
#=======
# FIXME
if pdb_filename:
pdb_filename = os.path.abspath(pdb_filename)
pdb_name = Path(pdb_filename).stem
infile_pdb = pdb_filename
else:
pdb_filename = gene.lower() + '_complex.pdb'
pdb_name = Path(pdb_filename).stem
infile_pdb = indir + '/' + pdb_filename
actual_pdb_filename = Path(infile_pdb).name
if mut_filename:
mutation_file = os.path.abspath(mut_filename)
infile_muts = mutation_file
print('User-provided mutation file in use:', infile_muts)
else:
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
infile_muts = outdir + '/' + mutation_file
print('WARNING: Assuming default mutation file:', infile_muts)
#=======
# output
#=======
out_filename = gene.lower() + '_foldx.csv'
outfile_foldx = outdir + '/' + out_filename
print('Arguments being passed:'
, '\nDrug:', args.drug
, '\ngene:', args.gene
, '\ninput dir:', indir
, '\nprocess dir:', process_dir
, '\noutput dir:', outdir
, '\npdb file:', infile_pdb
, '\npdb name:', pdb_name
, '\nactual pdb name:', actual_pdb_filename
, '\nmutation file:', infile_muts
, '\nchain1:', args.chain1
, '\noutput file:', outfile_foldx
, '\n=============================================================')
#### Delay for 10 seconds to check the params ####
print('Sleeping for 10 seconds to give you time to cancel')
time.sleep(10)
#=======================================================================
def getInteractionEnergy(filename):
data = pd.read_csv(filename,sep = '\t')
return data['Interaction Energy'].loc[0]
def getInteractions(filename):
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
contactList = getIndexes(data,1)
number = len(contactList)
return number
def formatMuts(mut_file,pdbname):
with open(mut_file) as csvfile:
readCSV = csv.reader(csvfile)
muts = []
for row in readCSV:
mut = row[0]
muts.append(mut)
mut_list = []
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
with open(outfile, 'w') as output:
for m in muts:
print(m)
mut = m[:1] + chainA+ m[1:]
mut_list.append(mut)
mut = mut + ';'
print(mut)
output.write(mut)
output.write('\n')
return mut_list
def getIndexes(data, value):
colnames = data.columns.values
listOfPos = list()
result = data.isin([value])
result.columns = colnames
seriesdata = result.any()
columnNames = list(seriesdata[seriesdata==True].index)
for col in columnNames:
rows = list(result[col][result[col]==True].index)
for row in rows:
listOfPos.append((row,col))
return listOfPos
def loadFiles(df):
# load a text file in to np matrix
resultList = []
f = open(df,'r')
for line in f:
line = line.rstrip('\n')
aVals = line.split('\t')
fVals = list(map(np.float32, sVals))
resultList.append(fVals)
f.close()
return np.asarray(resultList, dtype=np.float32)
# TODO: put the subprocess call in a 'def'
#def repairPDB():
# subprocess.call(['foldx'
# , '--command=RepairPDB'
# , '--pdb-dir=' + indir
# , '--pdb=' + actual_pdb_filename
# , '--ionStrength=0.05'#
# , '--pH=7'
# , '--water=PREDICT'
# , '--vdwDesign=1'
# , 'outPDB=true'
# , '--output-dir=' + process_dir])
#=======================================================================
def main():
pdbname = pdb_name
comp = '' # for complex only
mut_filename = infile_muts #pnca_mcsm_snps.csv
mutlist = formatMuts(mut_filename, pdbname)
print(mutlist)
nmuts = len(mutlist)
print(nmuts)
print(mutlist)
print('start')
# some common parameters for foldX
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
print('Running foldx RepairPDB for WT')
subprocess.call(['foldx5'
, '--command=RepairPDB'
, foldx_common
, '--pdb-dir=' + os.path.dirname(pdb_filename)
, '--pdb=' + actual_pdb_filename
, 'outPDB=true'
, '--output-dir=' + process_dir])
print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
print('Running foldx BuildModel for WT')
subprocess.call(['foldx5'
, '--command=BuildModel'
, foldx_common
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--mutant-file="individual_list_' + pdbname +'.txt"'
, 'outPDB=true'
, '--numberOfRuns=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx PrintNetworks for WT')
subprocess.call(['foldx5'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx SequenceDetail for WT')
subprocess.call(['foldx5'
, '--command=SequenceDetail'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
for n in range(1,nmuts+1):
print('\033[95mNETWORK:\033[0m', n)
print('Running foldx PrintNetworks for mutation', n)
subprocess.call(['foldx5'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
for n in range(1,nmuts+1):
print('\033[95mMUTATION:\033[0m', n)
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
#FIXME: bad design and needs to be done in a pythonic way
with suppress(Exception):
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
print('\n==========================================================')
print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
# FIXME: this is bad design and needs to be done in a pythonic way
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
print('\n==========================================================')
if comp=='y':
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
chain1=chainA
chain2=chainB
subprocess.call(['foldx5'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
shutil.copyfile(ac_source, ac_dest)
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
for n in range(1,nmuts+1):
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
subprocess.call(['foldx5'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
shutil.copyfile(ac_mut_source, ac_mut_dest)
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
print('\n==========================================================')
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
dGdata = pd.read_csv(dGdatafile, sep = '\t')
ddG=[]
print('ddG')
print(len(dGdata))
for i in range(0,len(dGdata)):
ddG.append(dGdata['total energy'].loc[i])
nint = len(interactions)
wt_int = []
for i in interactions:
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
wt_int.append(getInteractions(filename))
print('wt')
print(wt_int)
ntotal = nint+1
print(ntotal)
print(nmuts)
data = np.empty((ntotal,nmuts))
data[0] = ddG
print(data)
for i in range(0,len(interactions)):
d=[]
p=0
for n in range(1, nmuts+1):
print(i)
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
mut = getInteractions(filename)
diff = wt_int[i] - mut
print(diff)
print(wt_int[i])
print(mut)
d.append(diff)
print(d)
data[i+1] = d
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
print(interactions)
IE = []
if comp=='y':
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
wtE = getInteractionEnergy(wtfilename)
print(wtE)
for n in range(1,nmuts+1):
print(n)
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
mutE = getInteractionEnergy(filename)
print(mutE)
diff = wtE - mutE
print(diff)
IE.append(diff)
print(IE)
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
IEresults.to_csv(IEfilename)
print(len(IE))
data = np.append(data,[IE], axis = 0)
print(data)
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
with open(mut_file) as csvfile:
readCSV = csv.reader(csvfile)
mutlist = []
for row in readCSV:
mut = row[0]
mutlist.append(mut)
print(mutlist)
print(len(mutlist))
print(data)
results = pd.DataFrame(data, columns = mutlist, index = interactions)
results.append(ddG)
#print(results.head())
# my style formatted results
results2 = results.T # transpose df
results2.index.name = 'mutationinformation' # assign name to index
results2 = results2.reset_index() # turn it into a columns
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
# lower case columns
results2.columns = results2.columns.str.lower()
print('Writing file in the format below:\n'
, results2.head()
, '\nNo. of rows:', len(results2)
, '\nNo. of cols:', len(results2.columns))
outputfilename = outfile_foldx
#outputfilename = 'foldx_results_' + pdbname + '.csv'
#results.to_csv(outputfilename)
results2.to_csv(outputfilename, index = False)
print ('end')
if __name__ == '__main__':
main()

View file

@ -1,7 +1,7 @@
INDIR=$1
PDB=$2
OUTDIR=$3
cd ${OUTDIR}
logger "Running repairPDB"
#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}

View file

@ -7,4 +7,3 @@ logger "Running runcomplex"
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt

View file

@ -2,7 +2,7 @@ PDB=$1
OUTDIR=$2
cd ${OUTDIR}
pwd
ls
ls -l
logger "Running runfoldx"
foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR}
foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}

View file

@ -1,14 +1,15 @@
PDB=$1
n=$2
#cd /home/tanu/git/LSHTM_analysis/foldx/
logger "Running mutrenamefiles_mac"
OUTDIR=$3
cd ${OUTDIR}
#cd /home/git/LSHTM_analysis/foldx/test2
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt
cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
@ -35,34 +36,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClas
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt

View file

@ -1,14 +1,16 @@
PDB=$1
logger "Running renamefiles_mac"
#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt
OUTDIR=$2
cd ${OUTDIR}
#cd /home/git/LSHTM_analysis/foldx/test2
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
sed -i '1,8d' Dif_${PDB}_Repair.txt
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt
cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
@ -35,34 +37,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_M
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt
cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt
cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt
cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt
cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt
cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt
cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt
cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt
cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt

1
foldx/test2/runFoldx.py Symbolic link
View file

@ -0,0 +1 @@
../runFoldx.py

250
foldx/test2/runFoldx_test.py Executable file
View file

@ -0,0 +1,250 @@
#!/usr/bin/env python3
import subprocess
import os
import numpy as np
import pandas as pd
from contextlib import suppress
import re
import csv
def getInteractions(filename):
data = pd.read_csv(filename, index_col=0, header =0, sep="\t")
contactList = getIndexes(data,1)
print(contactList)
number = len(contactList)
return number
def formatMuts(mut_file,pdbname):
with open(mut_file) as csvfile:
readCSV = csv.reader(csvfile)
muts = []
for row in readCSV:
mut = row[0]
muts.append(mut)
mut_list = []
outfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/individual_list_"+pdbname+".txt"
with open(outfile, "w") as output:
for m in muts:
print(m)
mut = m[:1]+'A'+m[1:]
mut_list.append(mut)
mut = mut + ";"
print(mut)
output.write(mut)
output.write("\n")
return mut_list
def getIndexes(data, value):
colnames = data.columns.values
listOfPos = list()
result = data.isin([value])
result.columns=colnames
seriesdata = result.any()
columnNames = list(seriesdata[seriesdata==True].index)
for col in columnNames:
rows = list(result[col][result[col]==True].index)
for row in rows:
listOfPos.append((row,col))
return listOfPos
def loadFiles(df):
# load a text file in to np matrix
resultList = []
f = open(df,'r')
for line in f:
line = line.rstrip('\n')
aVals = line.split("\t")
fVals = list(map(np.float32, sVals))
resultList.append(fVals)
f.close()
return np.asarray(resultList, dtype=np.float32)
#=======================================================================
def main():
pdbname = '3pl1'
mut_filename = "pnca_muts_sample.csv"
mutlist = formatMuts(mut_filename, pdbname)
print(mutlist)
nmuts = len(mutlist)+1
print(nmuts)
print(mutlist)
print("start")
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname])
print("end")
for n in range(1,nmuts):
print(n)
with suppress(Exception):
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname,str(n)])
for n in range(1,nmuts):
print(n)
with suppress(Exception):
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname,str(n)])
out = subprocess.check_output(['bash','renamefiles.sh',pdbname])
dGdatafile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Dif_"+pdbname+"_Repair.txt"
dGdata = pd.read_csv(dGdatafile, sep="\t")
print(dGdata)
ddG=[]
for i in range(0,len(dGdata)):
ddG.append(dGdata['total energy'].loc[i])
print(ddG)
distfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_PN.txt"
wt_nc = getInteractions(distfile)
elecfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_PN.txt"
wt_neRR = getInteractions(elecfileRR)
elecfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_MM_"+pdbname+"_Repair_PN.txt"
wt_neMM = getInteractions(elecfileMM)
elecfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SM_"+pdbname+"_Repair_PN.txt"
wt_neSM = getInteractions(elecfileSM)
elecfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SS_"+pdbname+"_Repair_PN.txt"
wt_neSS = getInteractions(elecfileSS)
disufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_PN.txt"
wt_ndRR = getInteractions(disufileRR)
disufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_MM_"+pdbname+"_Repair_PN.txt"
wt_ndMM = getInteractions(disufileMM)
disufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SM_"+pdbname+"_Repair_PN.txt"
wt_ndSM = getInteractions(disufileSM)
disufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SS_"+pdbname+"_Repair_PN.txt"
wt_ndSS = getInteractions(disufileSS)
hbndfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_PN.txt"
wt_nhRR = getInteractions(hbndfileRR)
hbndfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_MM_"+pdbname+"_Repair_PN.txt"
wt_nhMM = getInteractions(hbndfileMM)
hbndfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SM_"+pdbname+"_Repair_PN.txt"
wt_nhSM = getInteractions(hbndfileSM)
hbndfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SS_"+pdbname+"_Repair_PN.txt"
wt_nhSS = getInteractions(hbndfileSS)
partfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_PN.txt"
wt_npRR = getInteractions(partfileRR)
partfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_MM_"+pdbname+"_Repair_PN.txt"
wt_npMM = getInteractions(partfileMM)
partfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SM_"+pdbname+"_Repair_PN.txt"
wt_npSM = getInteractions(partfileSM)
partfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SS_"+pdbname+"_Repair_PN.txt"
wt_npSS = getInteractions(partfileSS)
vdwcfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_PN.txt"
wt_nvRR = getInteractions(vdwcfileRR)
vdwcfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_MM_"+pdbname+"_Repair_PN.txt"
wt_nvMM = getInteractions(vdwcfileMM)
vdwcfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SM_"+pdbname+"_Repair_PN.txt"
wt_nvSM = getInteractions(vdwcfileSM)
vdwcfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SS_"+pdbname+"_Repair_PN.txt"
wt_nvSS = getInteractions(vdwcfileSS)
volufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_PN.txt"
wt_nvoRR = getInteractions(volufileRR)
volufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_MM_"+pdbname+"_Repair_PN.txt"
wt_nvoMM = getInteractions(volufileMM)
volufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SM_"+pdbname+"_Repair_PN.txt"
wt_nvoSM = getInteractions(volufileSM)
volufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SS_"+pdbname+"_Repair_PN.txt"
wt_nvoSS = getInteractions(volufileSS)
dnc = []
dneRR = []
dneMM = []
dneSM = []
dneSS = []
dndRR = []
dndMM = []
dndSM = []
dndSS = []
dnhRR = []
dnhMM = []
dnhSM = []
dnhSS = []
dnpRR = []
dnpMM = []
dnpSM = []
dnpSS = []
dnvRR = []
dnvMM = []
dnvSM = []
dnvSS = []
dnvoRR = []
dnvoMM = []
dnvoSM = []
dnvoSS = []
for n in range(1, nmuts):
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_nc = getInteractions(filename)
diffc = wt_nc - mut_nc
dnc.append(diffc)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_neRR = getInteractions(filename)
diffeRR = wt_neRR - mut_neRR
dneRR.append(diffeRR)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_ndRR = getInteractions(filename)
diffdRR = wt_ndRR - mut_ndRR
dndRR.append(diffdRR)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_nhRR = getInteractions(filename)
diffhRR = wt_nhRR - mut_nhRR
dnhRR.append(diffhRR)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_npRR = getInteractions(filename)
diffpRR = wt_npRR - mut_npRR
dnpRR.append(diffpRR)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_nvRR = getInteractions(filename)
diffvRR = wt_nvRR - mut_nvRR
dnvRR.append(diffvRR)
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
mut_nvoRR = getInteractions(filename)
diffvoRR = wt_nvoRR - mut_nvoRR
dnvoRR.append(diffvoRR)
print(dnc)
print(dneRR)
print(dndRR)
print(dnhRR)
print(dnpRR)
print(dnvRR)
print(dnvoRR)
results = pd.DataFrame([(ddG),(dnc),(dneRR),(dndRR),(dnhRR),(dnpRR),(dnvRR),(dnvoRR)], columns=mutlist, index=["ddG","contacts","electro","disulfide","hbonds","partcov","VdWClashes","volumetric"])
results.append(ddG)
print(results)
results2 = results.T # transpose df
outputfilename = "foldx_results_"+pdbname+".csv"
# results.to_csv(outputfilename)
results2.to_csv(outputfilename)
if __name__ == "__main__":
main()

456
foldx/test2/runFoldx_test2.py Executable file
View file

@ -0,0 +1,456 @@
#!/usr/bin/env python3
import subprocess
import os
import sys
import numpy as np
import pandas as pd
from contextlib import suppress
from pathlib import Path
import re
import csv
import argparse
import shutil
#https://realpython.com/python-pathlib/
# FIXME
#strong dependency of file and path names
#cannot pass file with path. Need to pass them separately
#assumptions made for dir struc as standard
#datadir + drug + input
#=======================================================================
#%% specify input and curr dir
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
#os.getcwd()
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None)
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
# FIXME: Doesn't work with 2 chains yet!
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output
#drug = 'pyrazinamide'
#gene = 'pncA'
#gene_match = gene + '_p.'
#%%=====================================================================
# Command line options
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
process_dir = args.process_dir
mut_filename = args.mutation_file
chainA = args.chain1
chainB = args.chain2
pdb_filename = args.pdb_file
# os.path.splitext will fail interestingly with file.pdb.txt.zip
#pdb_name = os.path.splitext(pdb_file)[0]
# Just the filename, thanks
#pdb_name = Path(in_filename_pdb).stem
#==============
# directories
#==============
if not datadir:
datadir = homedir + '/' + 'git/Data'
if not indir:
indir = datadir + '/' + drug + '/input'
if not outdir:
outdir = datadir + '/' + drug + '/output'
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
#if not process_dir:
# process_dir = datadir + '/' + drug + '/processing'
# Make all paths absolute in case the user forgot
indir = os.path.abspath(indir)
process_dir = os.path.abspath(process_dir)
outdir = os.path.abspath(outdir)
datadir = os.path.abspath(datadir)
#=======
# input
#=======
# FIXME
if pdb_filename:
pdb_name = Path(pdb_filename).stem
else:
pdb_filename = gene.lower() + '_complex.pdb'
pdb_name = Path(pdb_filename).stem
infile_pdb = indir + '/' + pdb_filename
actual_pdb_filename = Path(infile_pdb).name
#actual_pdb_filename = os.path.abspath(infile_pdb)
if mut_filename:
mutation_file = os.path.abspath(mut_filename)
infile_muts = mutation_file
print('User-provided mutation file in use:', infile_muts)
else:
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
infile_muts = outdir + '/' + mutation_file
print('WARNING: Assuming default mutation file:', infile_muts)
#=======
# output
#=======
out_filename = gene.lower() + '_foldx.csv'
outfile_foldx = outdir + '/' + out_filename
print('Arguments being passed:'
, '\nDrug:', args.drug
, '\ngene:', args.gene
, '\ninput dir:', indir
, '\nprocess dir:', process_dir
, '\noutput dir:', outdir
, '\npdb file:', infile_pdb
, '\npdb name:', pdb_name
, '\nactual pdb name:', actual_pdb_filename
, '\nmutation file:', infile_muts
, '\nchain1:', args.chain1
, '\noutput file:', outfile_foldx
, '\n=============================================================')
#=======================================================================
def getInteractionEnergy(filename):
data = pd.read_csv(filename,sep = '\t')
return data['Interaction Energy'].loc[0]
def getInteractions(filename):
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
contactList = getIndexes(data,1)
number = len(contactList)
return number
def formatMuts(mut_file,pdbname):
with open(mut_file) as csvfile:
readCSV = csv.reader(csvfile)
muts = []
for row in readCSV:
mut = row[0]
muts.append(mut)
mut_list = []
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
with open(outfile, 'w') as output:
for m in muts:
print(m)
mut = m[:1] + chainA+ m[1:]
mut_list.append(mut)
mut = mut + ';'
print(mut)
output.write(mut)
output.write('\n')
return mut_list
def getIndexes(data, value):
colnames = data.columns.values
listOfPos = list()
result = data.isin([value])
result.columns = colnames
seriesdata = result.any()
columnNames = list(seriesdata[seriesdata==True].index)
for col in columnNames:
rows = list(result[col][result[col]==True].index)
for row in rows:
listOfPos.append((row,col))
return listOfPos
def loadFiles(df):
# load a text file in to np matrix
resultList = []
f = open(df,'r')
for line in f:
line = line.rstrip('\n')
aVals = line.split('\t')
fVals = list(map(np.float32, sVals))
resultList.append(fVals)
f.close()
return np.asarray(resultList, dtype=np.float32)
# TODO: use this code pattern rather than invoking bash
#def repairPDB():
# subprocess.call(['foldx'
# , '--command=RepairPDB'
# , '--pdb-dir=' + indir
# , '--pdb=' + actual_pdb_filename
# , '--ionStrength=0.05'#
# , '--pH=7'
# , '--water=PREDICT'
# , '--vdwDesign=1'
# , 'outPDB=true'
# , '--output-dir=' + process_dir])
#=======================================================================
def main():
pdbname = pdb_name
comp = '' # for complex only
mut_filename = infile_muts #pnca_mcsm_snps.csv
mutlist = formatMuts(mut_filename, pdbname)
print(mutlist)
nmuts = len(mutlist)
print(nmuts)
print(mutlist)
print('start')
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
print('\033[95mSTAGE: repair PDB\033[0m')
print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir))
#subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
# once you decide to use the function
# repairPDB(pdbname)
# FIXME: put this hack elsewhere
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
subprocess.call(['foldx'
, '--command=RepairPDB'
, foldx_common
, '--pdb-dir=' + indir
, '--pdb=' + actual_pdb_filename
, 'outPDB=true'
, '--output-dir=' + process_dir])
print('\033[95mCOMPLETE: repair PDB\033[0m')
print('\033[95mSTAGE: run FoldX (subprocess)\033[0m')
print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir))
#output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
print('Running foldx BuildModel')
subprocess.call(['foldx'
, '--command=BuildModel'
, foldx_common
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--mutant-file="individual_list_' + pdbname +'.txt"'
, 'outPDB=true'
, '--numberOfRuns=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx PrintNetworks')
subprocess.call(['foldx'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('Running foldx SequenceDetail')
subprocess.call(['foldx'
, '--command=SequenceDetail'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
print('\033[95mCOMPLETE: run FoldX (subprocess)\033[0m')
print('\033[95mSTAGE: Print Networks (shell)\033[0m')
for n in range(1,nmuts+1):
print('\033[95mNETWORK:\033[0m', n)
#print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir ))
#with suppress(Exception):
#foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
print('Running foldx PrintNetworks for mutation', n)
subprocess.call(['foldx'
, '--command=PrintNetworks'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
#subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
print('\033[95mCOMPLETE: Print Networks (shell)\033[0m')
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
for n in range(1,nmuts+1):
print('\033[95mMUTATION:\033[0m', n)
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
# FIXME: this is bad design and needs to be done in a pythonic way
with suppress(Exception):
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
print('\033[95mCOMPLETE: Rename Mutation Files (shell)\033[0m')
print('\033[95mSTAGE: Rename Files (shell)\033[0m')
# FIXME: this is bad design and needs to be done in a pythonic way
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
print('\033[95mCOMPLETE: Rename Files (shell)\033[0m')
if comp=='y':
print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess)\033[0m')
chain1=chainA
chain2=chainB
#with suppress(Exception):
#subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
subprocess.call(['foldx'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
shutil.copyfile(ac_source, ac_dest)
for n in range(1,nmuts+1):
print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
#with suppress(Exception):
# subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
subprocess.call(['foldx'
, '--command=AnalyseComplex'
, '--pdb-dir=' + process_dir
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
, '--analyseComplexChains=' + chain1 + ',' + chain2
, '--water=PREDICT'
, '--vdwDesign=1'
, '--output-dir=' + process_dir], cwd=process_dir)
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
shutil.copyfile(ac_mut_source, ac_mut_dest)
print('\033[95mCOMPLETE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
dGdata = pd.read_csv(dGdatafile, sep = '\t')
ddG=[]
print('ddG')
print(len(dGdata))
for i in range(0,len(dGdata)):
ddG.append(dGdata['total energy'].loc[i])
nint = len(interactions)
wt_int = []
for i in interactions:
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
wt_int.append(getInteractions(filename))
print('wt')
print(wt_int)
ntotal = nint+1
print(ntotal)
print(nmuts)
data = np.empty((ntotal,nmuts))
data[0] = ddG
print(data)
for i in range(0,len(interactions)):
d=[]
p=0
for n in range(1, nmuts+1):
print(i)
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
mut = getInteractions(filename)
diff = wt_int[i] - mut
print(diff)
print(wt_int[i])
print(mut)
d.append(diff)
print(d)
data[i+1] = d
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
print(interactions)
IE = []
if comp=='y':
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
wtE = getInteractionEnergy(wtfilename)
print(wtE)
for n in range(1,nmuts+1):
print(n)
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
mutE = getInteractionEnergy(filename)
print(mutE)
diff = wtE - mutE
print(diff)
IE.append(diff)
print(IE)
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
IEresults.to_csv(IEfilename)
print(len(IE))
data = np.append(data,[IE], axis = 0)
print(data)
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
with open(mut_file) as csvfile:
readCSV = csv.reader(csvfile)
mutlist = []
for row in readCSV:
mut = row[0]
mutlist.append(mut)
print(mutlist)
print(len(mutlist))
print(data)
results = pd.DataFrame(data, columns = mutlist, index = interactions)
results.append(ddG)
#print(results.head())
# my style formatted results
results2 = results.T # transpose df
results2.index.name = 'mutationinformation' # assign name to index
results2 = results2.reset_index() # turn it into a columns
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
# lower case columns
results2.columns = results2.columns.str.lower()
print('Writing file in the format below:\n'
, results2.head()
, '\nNo. of rows:', len(results2)
, '\nNo. of cols:', len(results2.columns))
outputfilename = outfile_foldx
#outputfilename = 'foldx_results_' + pdbname + '.csv'
#results.to_csv(outputfilename)
results2.to_csv(outputfilename, index = False)
if __name__ == '__main__':
main()

View file

@ -26,7 +26,7 @@ Created on Tue Aug 6 12:56:03 2019
# 1) <gene>_gwas.csv
# 2) <gene>_common_ids.csv
# 3) <gene>_ambiguous_muts.csv
# 4) <gene>_mcsm_snps.csv
# 4) <gene>_mcsm_formatted_snps.csv
# 5) <gene>_metadata_poscounts.csv
# 6) <gene>_metadata.csv
# 7) <gene>_all_muts_msa.csv
@ -1193,7 +1193,7 @@ if snps_only.mutationinformation.isna().sum() == 0:
else:
sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?')
out_filename_mcsmsnps = gene.lower() + '_mcsm_style_snps.csv'
out_filename_mcsmsnps = gene.lower() + '_mcsm_formatted_snps.csv'
outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps
print('\n----------------------------------'