Merge branch 'master' of https://git.tunstall.in/tanu/LSHTM_analysis
This commit is contained in:
commit
bcf4467c44
17 changed files with 1538 additions and 109 deletions
46
dynamut/dynamut.py
Executable file
46
dynamut/dynamut.py
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Aug 19 14:33:51 2020
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
|
||||
|
||||
#%% load packages
|
||||
import os,sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_string_dtype
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
#%%============================================================================
|
||||
|
||||
#1) define muts batch
|
||||
#take mcsm file
|
||||
#split into 'n' batches
|
||||
#write output file with suffix of batch number
|
||||
|
||||
|
||||
#********** done this par ****************
|
||||
#2) get results for a batch url
|
||||
# read file
|
||||
# store batch url
|
||||
#extract number
|
||||
#build single url
|
||||
#build single results urls
|
||||
#get results and store them in df
|
||||
#update df
|
||||
#dim of df = no. of muts in batch
|
||||
|
||||
#3) format results
|
||||
# store unit measurements separtely
|
||||
# omit unit measurements from cols
|
||||
# create extra columns '_outcome' suffix by splitting numerical output
|
||||
# create separate col for mcsm as it doesn't have output text
|
||||
|
||||
#%%============================================================================
|
101
dynamut/dynamut_test.py
Executable file
101
dynamut/dynamut_test.py
Executable file
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Aug 19 14:33:51 2020
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
|
||||
|
||||
#%% load packages
|
||||
import os,sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_string_dtype
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
#%%============================================================================
|
||||
|
||||
batch_result_url = 'http://biosig.unimelb.edu.au/dynamut/results_prediction/15955901077'
|
||||
|
||||
mut = 'S104R'
|
||||
single_result_url = 'http://biosig.unimelb.edu.au/dynamut/single_results/15955901077' + '/' + mut
|
||||
|
||||
|
||||
|
||||
#%%============================================================================
|
||||
param_dict = {}
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('Fetching results')
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : mut
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
|
||||
#%% for loop
|
||||
#%%
|
||||
host_dynamut = 'http://biosig.unimelb.edu.au/dynamut'
|
||||
batch_url_number = re.search(r'([0-9]+)$', batch_result_url).group(0)
|
||||
single_url = host_dynamut + '/single_results/' + batch_url_number
|
||||
|
||||
muts = ["S104R", "G24R"]
|
||||
|
||||
# initilialise empty df
|
||||
dynamut_results_df = pd.DataFrame()
|
||||
|
||||
for i, mut in enumerate(muts):
|
||||
#param_dict = {}
|
||||
print('Running mutation', i, ':', mut)
|
||||
snp = mut
|
||||
single_result_url = single_url + '/' + snp
|
||||
print('Getting results from:', single_result_url)
|
||||
|
||||
result_response = requests.get(single_result_url)
|
||||
if result_response.status_code == 200:
|
||||
print('Fetching results')
|
||||
# extract results using the html parser
|
||||
soup = BeautifulSoup(result_response.text, features = 'html.parser')
|
||||
#web_result_raw = soup.find(id = 'predictions').get_text()
|
||||
ddg_dynamut = soup.find(id = 'ddg_dynamut').get_text()
|
||||
ddg_encom = soup.find(id = 'ddg_encom').get_text()
|
||||
ddg_mcsm = soup.find(id = 'ddg_mcsm').get_text()
|
||||
ddg_sdm = soup.find(id = 'ddg_sdm').get_text()
|
||||
ddg_duet = soup.find(id = 'ddg_duet').get_text()
|
||||
dds_encom = soup.find(id = 'dds_encom').get_text()
|
||||
|
||||
param_dict = {"mutationinformation" : snp
|
||||
, "ddg_dynamut" : ddg_dynamut
|
||||
, "ddg_encom" : ddg_encom
|
||||
, "ddg_mcsm" : ddg_mcsm
|
||||
, "ddg_sdm" : ddg_sdm
|
||||
, "ddg_duet" : ddg_duet
|
||||
, "dds_encom" : dds_encom
|
||||
}
|
||||
results_df = pd.DataFrame.from_dict(param_dict, orient = "index").T
|
||||
print(results_df)
|
||||
dynamut_results_df = dynamut_results_df.append(results_df)
|
||||
print(dynamut_results_df)
|
||||
|
||||
|
|
@ -2,7 +2,7 @@ PDB=$1
|
|||
n=$2
|
||||
OUTDIR=$3
|
||||
cd ${OUTDIR}
|
||||
logger "Running mutrenamefiles with PDB: ${PDB} n: ${n} OUTDIR: ${OUTDIR}"
|
||||
|
||||
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
|
||||
|
@ -61,9 +61,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidue
|
|||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
PDB=$1
|
||||
OUTDIR=$2
|
||||
cd ${OUTDIR}
|
||||
logger "Running renamefiles"
|
||||
|
||||
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||
sed -i '1,8d' Dif_${PDB}_Repair.txt
|
||||
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
|
||||
|
@ -62,9 +62,3 @@ cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Vol
|
|||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from contextlib import suppress
|
||||
|
@ -8,6 +9,8 @@ from pathlib import Path
|
|||
import re
|
||||
import csv
|
||||
import argparse
|
||||
import shutil
|
||||
import time
|
||||
#https://realpython.com/python-pathlib/
|
||||
|
||||
# FIXME
|
||||
|
@ -22,8 +25,8 @@ homedir = os.path.expanduser('~')
|
|||
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||
os.getcwd()
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||
#os.getcwd()
|
||||
|
||||
#=======================================================================
|
||||
#%% command line args
|
||||
|
@ -35,11 +38,12 @@ arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)',
|
|||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!')
|
||||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||
|
||||
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||
arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||
|
||||
# FIXME: Doesn't work with 2 chains yet!
|
||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||
|
||||
|
@ -64,11 +68,20 @@ chainA = args.chain1
|
|||
chainB = args.chain2
|
||||
pdb_filename = args.pdb_file
|
||||
|
||||
|
||||
# os.path.splitext will fail interestingly with file.pdb.txt.zip
|
||||
#pdb_name = os.path.splitext(pdb_file)[0]
|
||||
# Just the filename, thanks
|
||||
#pdb_name = Path(in_filename_pdb).stem
|
||||
|
||||
|
||||
# Handle the case where neither 'drug'
|
||||
# nor (indir,outdir,process_dir) are defined
|
||||
if not drug:
|
||||
if not indir or not outdir or not process_dir:
|
||||
print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
|
||||
sys.exit()
|
||||
|
||||
#==============
|
||||
# directories
|
||||
#==============
|
||||
|
@ -83,27 +96,37 @@ if not outdir:
|
|||
|
||||
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
|
||||
if not process_dir:
|
||||
process_dir = datadir + '/' + drug +'/' + 'processing'
|
||||
process_dir = datadir + '/' + drug + '/processing'
|
||||
|
||||
# Make all paths absolute in case the user forgot
|
||||
indir = os.path.abspath(indir)
|
||||
process_dir = os.path.abspath(process_dir)
|
||||
outdir = os.path.abspath(outdir)
|
||||
datadir = os.path.abspath(datadir)
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
# FIXME
|
||||
if pdb_filename:
|
||||
pdb_filename = os.path.abspath(pdb_filename)
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
infile_pdb = pdb_filename
|
||||
else:
|
||||
pdb_filename = gene.lower() + '_complex.pdb'
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
infile_pdb = indir + '/' + pdb_filename
|
||||
|
||||
infile_pdb = indir + '/' + pdb_filename
|
||||
actual_pdb_filename = Path(infile_pdb).name
|
||||
|
||||
if mut_filename:
|
||||
mutation_file = mut_filename
|
||||
mutation_file = os.path.abspath(mut_filename)
|
||||
infile_muts = mutation_file
|
||||
print('User-provided mutation file in use:', infile_muts)
|
||||
else:
|
||||
mutation_file = gene.lower() + '_mcsm_snps.csv'
|
||||
|
||||
infile_muts = outdir + '/' + mutation_file
|
||||
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||
infile_muts = outdir + '/' + mutation_file
|
||||
print('WARNING: Assuming default mutation file:', infile_muts)
|
||||
|
||||
#=======
|
||||
# output
|
||||
|
@ -115,6 +138,7 @@ print('Arguments being passed:'
|
|||
, '\nDrug:', args.drug
|
||||
, '\ngene:', args.gene
|
||||
, '\ninput dir:', indir
|
||||
, '\nprocess dir:', process_dir
|
||||
, '\noutput dir:', outdir
|
||||
, '\npdb file:', infile_pdb
|
||||
, '\npdb name:', pdb_name
|
||||
|
@ -123,6 +147,10 @@ print('Arguments being passed:'
|
|||
, '\nchain1:', args.chain1
|
||||
, '\noutput file:', outfile_foldx
|
||||
, '\n=============================================================')
|
||||
|
||||
#### Delay for 10 seconds to check the params ####
|
||||
print('Sleeping for 10 seconds to give you time to cancel')
|
||||
time.sleep(10)
|
||||
#=======================================================================
|
||||
|
||||
def getInteractionEnergy(filename):
|
||||
|
@ -183,6 +211,19 @@ def loadFiles(df):
|
|||
f.close()
|
||||
return np.asarray(resultList, dtype=np.float32)
|
||||
|
||||
# TODO: put the subprocess call in a 'def'
|
||||
#def repairPDB():
|
||||
# subprocess.call(['foldx'
|
||||
# , '--command=RepairPDB'
|
||||
# , '--pdb-dir=' + indir
|
||||
# , '--pdb=' + actual_pdb_filename
|
||||
# , '--ionStrength=0.05'#
|
||||
# , '--pH=7'
|
||||
# , '--water=PREDICT'
|
||||
# , '--vdwDesign=1'
|
||||
# , 'outPDB=true'
|
||||
# , '--output-dir=' + process_dir])
|
||||
|
||||
#=======================================================================
|
||||
def main():
|
||||
pdbname = pdb_name
|
||||
|
@ -194,37 +235,128 @@ def main():
|
|||
nmuts = len(mutlist)
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
|
||||
print('start')
|
||||
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
|
||||
subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
|
||||
# some common parameters for foldX
|
||||
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
|
||||
|
||||
print('end')
|
||||
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
|
||||
print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
|
||||
print('Running foldx RepairPDB for WT')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=RepairPDB'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + os.path.dirname(pdb_filename)
|
||||
, '--pdb=' + actual_pdb_filename
|
||||
, 'outPDB=true'
|
||||
, '--output-dir=' + process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
|
||||
print('Running foldx BuildModel for WT')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=BuildModel'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--mutant-file="individual_list_' + pdbname +'.txt"'
|
||||
, 'outPDB=true'
|
||||
, '--numberOfRuns=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx PrintNetworks for WT')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx SequenceDetail for WT')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=SequenceDetail'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
|
||||
print('\033[95mNETWORK:\033[0m', n)
|
||||
print('Running foldx PrintNetworks for mutation', n)
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
print('\033[95mMUTATION:\033[0m', n)
|
||||
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
|
||||
#FIXME: bad design and needs to be done in a pythonic way
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
|
||||
# FIXME: this is bad design and needs to be done in a pythonic way
|
||||
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
if comp=='y':
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
|
||||
chain1=chainA
|
||||
chain2=chainB
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
|
||||
for n in range(1,nmuts+1):
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
|
||||
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
|
||||
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
shutil.copyfile(ac_source, ac_dest)
|
||||
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
|
||||
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
|
||||
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
|
||||
shutil.copyfile(ac_mut_source, ac_mut_dest)
|
||||
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
|
||||
print('\n==========================================================')
|
||||
|
||||
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
|
||||
dGdata = pd.read_csv(dGdatafile, sep = '\t')
|
||||
|
@ -266,8 +398,7 @@ def main():
|
|||
print(d)
|
||||
data[i+1] = d
|
||||
|
||||
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
print(interactions)
|
||||
|
||||
|
@ -291,8 +422,7 @@ def main():
|
|||
print(len(IE))
|
||||
data = np.append(data,[IE], axis = 0)
|
||||
print(data)
|
||||
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||
|
||||
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(mut_file) as csvfile:
|
||||
|
@ -330,6 +460,7 @@ def main():
|
|||
#outputfilename = 'foldx_results_' + pdbname + '.csv'
|
||||
#results.to_csv(outputfilename)
|
||||
results2.to_csv(outputfilename, index = False)
|
||||
print ('end')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
466
foldx/runFoldx5.py
Executable file
466
foldx/runFoldx5.py
Executable file
|
@ -0,0 +1,466 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
import re
|
||||
import csv
|
||||
import argparse
|
||||
import shutil
|
||||
import time
|
||||
#https://realpython.com/python-pathlib/
|
||||
|
||||
# FIXME
|
||||
#strong dependency of file and path names
|
||||
#cannot pass file with path. Need to pass them separately
|
||||
#assumptions made for dir struc as standard
|
||||
#datadir + drug + input
|
||||
|
||||
#=======================================================================
|
||||
#%% specify input and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||
#os.getcwd()
|
||||
|
||||
#=======================================================================
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
|
||||
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
|
||||
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None)
|
||||
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||
|
||||
arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||
|
||||
# FIXME: Doesn't work with 2 chains yet!
|
||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
#%%=====================================================================
|
||||
# Command line options
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
|
||||
datadir = args.datadir
|
||||
indir = args.input_dir
|
||||
outdir = args.output_dir
|
||||
process_dir = args.process_dir
|
||||
|
||||
mut_filename = args.mutation_file
|
||||
chainA = args.chain1
|
||||
chainB = args.chain2
|
||||
pdb_filename = args.pdb_file
|
||||
|
||||
|
||||
# os.path.splitext will fail interestingly with file.pdb.txt.zip
|
||||
#pdb_name = os.path.splitext(pdb_file)[0]
|
||||
# Just the filename, thanks
|
||||
#pdb_name = Path(in_filename_pdb).stem
|
||||
|
||||
|
||||
# Handle the case where neither 'drug'
|
||||
# nor (indir,outdir,process_dir) are defined
|
||||
if not drug:
|
||||
if not indir or not outdir or not process_dir:
|
||||
print('ERROR: if "drug" is not specified, you must specify Input, Output, and Process directories')
|
||||
sys.exit()
|
||||
|
||||
#==============
|
||||
# directories
|
||||
#==============
|
||||
if not datadir:
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
if not indir:
|
||||
indir = datadir + '/' + drug + '/input'
|
||||
|
||||
if not outdir:
|
||||
outdir = datadir + '/' + drug + '/output'
|
||||
|
||||
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
|
||||
if not process_dir:
|
||||
process_dir = datadir + '/' + drug + '/processing'
|
||||
|
||||
# Make all paths absolute in case the user forgot
|
||||
indir = os.path.abspath(indir)
|
||||
process_dir = os.path.abspath(process_dir)
|
||||
outdir = os.path.abspath(outdir)
|
||||
datadir = os.path.abspath(datadir)
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
# FIXME
|
||||
if pdb_filename:
|
||||
pdb_filename = os.path.abspath(pdb_filename)
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
infile_pdb = pdb_filename
|
||||
else:
|
||||
pdb_filename = gene.lower() + '_complex.pdb'
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
infile_pdb = indir + '/' + pdb_filename
|
||||
|
||||
actual_pdb_filename = Path(infile_pdb).name
|
||||
|
||||
if mut_filename:
|
||||
mutation_file = os.path.abspath(mut_filename)
|
||||
infile_muts = mutation_file
|
||||
print('User-provided mutation file in use:', infile_muts)
|
||||
else:
|
||||
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||
infile_muts = outdir + '/' + mutation_file
|
||||
print('WARNING: Assuming default mutation file:', infile_muts)
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
out_filename = gene.lower() + '_foldx.csv'
|
||||
outfile_foldx = outdir + '/' + out_filename
|
||||
|
||||
print('Arguments being passed:'
|
||||
, '\nDrug:', args.drug
|
||||
, '\ngene:', args.gene
|
||||
, '\ninput dir:', indir
|
||||
, '\nprocess dir:', process_dir
|
||||
, '\noutput dir:', outdir
|
||||
, '\npdb file:', infile_pdb
|
||||
, '\npdb name:', pdb_name
|
||||
, '\nactual pdb name:', actual_pdb_filename
|
||||
, '\nmutation file:', infile_muts
|
||||
, '\nchain1:', args.chain1
|
||||
, '\noutput file:', outfile_foldx
|
||||
, '\n=============================================================')
|
||||
|
||||
#### Delay for 10 seconds to check the params ####
|
||||
print('Sleeping for 10 seconds to give you time to cancel')
|
||||
time.sleep(10)
|
||||
#=======================================================================
|
||||
|
||||
def getInteractionEnergy(filename):
|
||||
data = pd.read_csv(filename,sep = '\t')
|
||||
return data['Interaction Energy'].loc[0]
|
||||
|
||||
def getInteractions(filename):
|
||||
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
|
||||
contactList = getIndexes(data,1)
|
||||
number = len(contactList)
|
||||
return number
|
||||
|
||||
def formatMuts(mut_file,pdbname):
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
muts = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
muts.append(mut)
|
||||
|
||||
mut_list = []
|
||||
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(outfile, 'w') as output:
|
||||
for m in muts:
|
||||
print(m)
|
||||
mut = m[:1] + chainA+ m[1:]
|
||||
mut_list.append(mut)
|
||||
mut = mut + ';'
|
||||
print(mut)
|
||||
output.write(mut)
|
||||
output.write('\n')
|
||||
return mut_list
|
||||
|
||||
def getIndexes(data, value):
|
||||
colnames = data.columns.values
|
||||
listOfPos = list()
|
||||
result = data.isin([value])
|
||||
result.columns = colnames
|
||||
seriesdata = result.any()
|
||||
columnNames = list(seriesdata[seriesdata==True].index)
|
||||
for col in columnNames:
|
||||
rows = list(result[col][result[col]==True].index)
|
||||
|
||||
for row in rows:
|
||||
listOfPos.append((row,col))
|
||||
|
||||
return listOfPos
|
||||
|
||||
def loadFiles(df):
|
||||
# load a text file in to np matrix
|
||||
resultList = []
|
||||
f = open(df,'r')
|
||||
for line in f:
|
||||
line = line.rstrip('\n')
|
||||
aVals = line.split('\t')
|
||||
fVals = list(map(np.float32, sVals))
|
||||
resultList.append(fVals)
|
||||
f.close()
|
||||
return np.asarray(resultList, dtype=np.float32)
|
||||
|
||||
# TODO: put the subprocess call in a 'def'
|
||||
#def repairPDB():
|
||||
# subprocess.call(['foldx'
|
||||
# , '--command=RepairPDB'
|
||||
# , '--pdb-dir=' + indir
|
||||
# , '--pdb=' + actual_pdb_filename
|
||||
# , '--ionStrength=0.05'#
|
||||
# , '--pH=7'
|
||||
# , '--water=PREDICT'
|
||||
# , '--vdwDesign=1'
|
||||
# , 'outPDB=true'
|
||||
# , '--output-dir=' + process_dir])
|
||||
|
||||
#=======================================================================
|
||||
def main():
|
||||
pdbname = pdb_name
|
||||
comp = '' # for complex only
|
||||
mut_filename = infile_muts #pnca_mcsm_snps.csv
|
||||
mutlist = formatMuts(mut_filename, pdbname)
|
||||
|
||||
print(mutlist)
|
||||
nmuts = len(mutlist)
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
|
||||
print('start')
|
||||
# some common parameters for foldX
|
||||
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
|
||||
|
||||
print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
|
||||
print('Running foldx RepairPDB for WT')
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=RepairPDB'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + os.path.dirname(pdb_filename)
|
||||
, '--pdb=' + actual_pdb_filename
|
||||
, 'outPDB=true'
|
||||
, '--output-dir=' + process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
|
||||
print('Running foldx BuildModel for WT')
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=BuildModel'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--mutant-file="individual_list_' + pdbname +'.txt"'
|
||||
, 'outPDB=true'
|
||||
, '--numberOfRuns=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx PrintNetworks for WT')
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx SequenceDetail for WT')
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=SequenceDetail'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mNETWORK:\033[0m', n)
|
||||
print('Running foldx PrintNetworks for mutation', n)
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mMUTATION:\033[0m', n)
|
||||
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
|
||||
#FIXME: bad design and needs to be done in a pythonic way
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: Rename Mutation Files (shell)\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Rename Files (shell) for WT\033[0m')
|
||||
# FIXME: this is bad design and needs to be done in a pythonic way
|
||||
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: Rename Files (shell) for WT\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
if comp=='y':
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
|
||||
chain1=chainA
|
||||
chain2=chainB
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
|
||||
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
shutil.copyfile(ac_source, ac_dest)
|
||||
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for WT:\033[0m', n)
|
||||
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
|
||||
subprocess.call(['foldx5'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
|
||||
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
|
||||
shutil.copyfile(ac_mut_source, ac_mut_dest)
|
||||
print('\033[95mCOMPLETED STAGE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
|
||||
print('\n==========================================================')
|
||||
|
||||
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
|
||||
dGdata = pd.read_csv(dGdatafile, sep = '\t')
|
||||
|
||||
ddG=[]
|
||||
print('ddG')
|
||||
print(len(dGdata))
|
||||
for i in range(0,len(dGdata)):
|
||||
ddG.append(dGdata['total energy'].loc[i])
|
||||
|
||||
|
||||
nint = len(interactions)
|
||||
wt_int = []
|
||||
|
||||
for i in interactions:
|
||||
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
|
||||
wt_int.append(getInteractions(filename))
|
||||
print('wt')
|
||||
print(wt_int)
|
||||
|
||||
ntotal = nint+1
|
||||
print(ntotal)
|
||||
print(nmuts)
|
||||
data = np.empty((ntotal,nmuts))
|
||||
data[0] = ddG
|
||||
print(data)
|
||||
for i in range(0,len(interactions)):
|
||||
d=[]
|
||||
p=0
|
||||
for n in range(1, nmuts+1):
|
||||
print(i)
|
||||
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
|
||||
mut = getInteractions(filename)
|
||||
diff = wt_int[i] - mut
|
||||
print(diff)
|
||||
print(wt_int[i])
|
||||
print(mut)
|
||||
d.append(diff)
|
||||
print(d)
|
||||
data[i+1] = d
|
||||
|
||||
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
print(interactions)
|
||||
|
||||
IE = []
|
||||
if comp=='y':
|
||||
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
wtE = getInteractionEnergy(wtfilename)
|
||||
print(wtE)
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
|
||||
mutE = getInteractionEnergy(filename)
|
||||
print(mutE)
|
||||
diff = wtE - mutE
|
||||
print(diff)
|
||||
IE.append(diff)
|
||||
print(IE)
|
||||
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
|
||||
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
|
||||
IEresults.to_csv(IEfilename)
|
||||
print(len(IE))
|
||||
data = np.append(data,[IE], axis = 0)
|
||||
print(data)
|
||||
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||
|
||||
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
mutlist = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
mutlist.append(mut)
|
||||
print(mutlist)
|
||||
print(len(mutlist))
|
||||
print(data)
|
||||
results = pd.DataFrame(data, columns = mutlist, index = interactions)
|
||||
results.append(ddG)
|
||||
#print(results.head())
|
||||
|
||||
# my style formatted results
|
||||
results2 = results.T # transpose df
|
||||
results2.index.name = 'mutationinformation' # assign name to index
|
||||
results2 = results2.reset_index() # turn it into a columns
|
||||
|
||||
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
|
||||
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
|
||||
|
||||
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
|
||||
|
||||
# lower case columns
|
||||
results2.columns = results2.columns.str.lower()
|
||||
|
||||
print('Writing file in the format below:\n'
|
||||
, results2.head()
|
||||
, '\nNo. of rows:', len(results2)
|
||||
, '\nNo. of cols:', len(results2.columns))
|
||||
|
||||
outputfilename = outfile_foldx
|
||||
#outputfilename = 'foldx_results_' + pdbname + '.csv'
|
||||
#results.to_csv(outputfilename)
|
||||
results2.to_csv(outputfilename, index = False)
|
||||
print ('end')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,7 +1,7 @@
|
|||
INDIR=$1
|
||||
PDB=$2
|
||||
OUTDIR=$3
|
||||
|
||||
cd ${OUTDIR}
|
||||
logger "Running repairPDB"
|
||||
|
||||
#foldx --command=RepairPDB --pdb="${PDB}.pdb" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 outPDB=true --output-dir=${OUTDIR}
|
|
@ -7,4 +7,3 @@ logger "Running runcomplex"
|
|||
foldx --command=AnalyseComplex --pdb="${PDB}_Repair.pdb" --analyseComplexChains=${A},${B} --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||
cp ${OUTDIR}/Summary_${PDB}_Repair_AC.fxout ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||
#sed -i .bak -e 1,8d ${OUTDIR}/Summary_${PDB}_Repair_AC.txt
|
||||
|
|
@ -2,7 +2,7 @@ PDB=$1
|
|||
OUTDIR=$2
|
||||
cd ${OUTDIR}
|
||||
pwd
|
||||
ls
|
||||
ls -l
|
||||
logger "Running runfoldx"
|
||||
foldx --command=BuildModel --pdb="${PDB}_Repair.pdb" --mutant-file="individual_list_${PDB}.txt" --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 --out-pdb=true --numberOfRuns=1 --output-dir=${OUTDIR}
|
||||
foldx --command=PrintNetworks --pdb="${PDB}_Repair.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
|
@ -1,14 +1,15 @@
|
|||
PDB=$1
|
||||
n=$2
|
||||
#cd /home/tanu/git/LSHTM_analysis/foldx/
|
||||
logger "Running mutrenamefiles_mac"
|
||||
OUTDIR=$3
|
||||
cd ${OUTDIR}
|
||||
#cd /home/git/LSHTM_analysis/foldx/test2
|
||||
cp Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout Matrix_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_${n}_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_${n}_PN.fxout Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,4d Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout Matrix_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_${n}_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_${n}_PN.txt
|
||||
|
@ -35,34 +36,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClas
|
|||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_${n}_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_${n}_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_${n}_PN.fxout AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_${n}_PN.fxout AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_${n}_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
sed -i .bak -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_${n}_PN.txt
|
|
@ -1,14 +1,16 @@
|
|||
PDB=$1
|
||||
logger "Running renamefiles_mac"
|
||||
#cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||
sed -i '.bak' -e 1,8d Dif_${PDB}_Repair.txt
|
||||
OUTDIR=$2
|
||||
cd ${OUTDIR}
|
||||
#cd /home/git/LSHTM_analysis/foldx/test2
|
||||
cp Dif_${PDB}_Repair.fxout Dif_${PDB}_Repair.txt
|
||||
sed -i '1,8d' Dif_${PDB}_Repair.txt
|
||||
cp Matrix_Hbonds_${PDB}_Repair_PN.fxout Matrix_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_MM_${PDB}_Repair_PN.txt
|
||||
sed -n '383,568p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_Hbonds_${PDB}_Repair_PN.fxout > Matrix_Hbonds_SS_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Distances_${PDB}_Repair_PN.fxout Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,4d Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '1,4d' Matrix_Distances_${PDB}_Repair_PN.txt
|
||||
cp Matrix_Volumetric_${PDB}_Repair_PN.fxout Matrix_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -n '5,190p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_RR_${PDB}_Repair_PN.txt
|
||||
sed -n '194,379p' Matrix_Volumetric_${PDB}_Repair_PN.fxout > Matrix_Volumetric_MM_${PDB}_Repair_PN.txt
|
||||
|
@ -35,34 +37,28 @@ sed -n '194,379p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_M
|
|||
sed -n '383,568p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SM_${PDB}_Repair_PN.txt
|
||||
sed -n '572,757p' Matrix_VdWClashes_${PDB}_Repair_PN.fxout > Matrix_VdWClashes_SS_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Disulfide_${PDB}_Repair_PN.fxout AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Disulfide_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Electro_${PDB}_Repair_PN.fxout AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Electro_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Hbonds_${PDB}_Repair_PN.fxout AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Partcov_${PDB}_Repair_PN.fxout AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Partcov_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_VdWClashes_${PDB}_Repair_PN.fxout AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp AllAtoms_Volumetric_${PDB}_Repair_PN.fxout AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,2d AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '1,2d' AllAtoms_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_VdWClashes_${PDB}_Repair_PN.fxout InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_VdWClashes_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Distances_${PDB}_Repair_PN.fxout InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Distances_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Electro_${PDB}_Repair_PN.fxout InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Electro_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Hbonds_${PDB}_Repair_PN.fxout InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Hbonds_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Partcov_${PDB}_Repair_PN.fxout InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Partcov_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Volumetric_${PDB}_Repair_PN.fxout InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
sed -i '1,5d' InteractingResidues_Volumetric_${PDB}_Repair_PN.txt
|
||||
cp InteractingResidues_Disulfide_${PDB}_Repair_PN.fxout InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
sed -i '.bak' -e 1,5d InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
sed -i '1,5d' InteractingResidues_Disulfide_${PDB}_Repair_PN.txt
|
1
foldx/test2/runFoldx.py
Symbolic link
1
foldx/test2/runFoldx.py
Symbolic link
|
@ -0,0 +1 @@
|
|||
../runFoldx.py
|
250
foldx/test2/runFoldx_test.py
Executable file
250
foldx/test2/runFoldx_test.py
Executable file
|
@ -0,0 +1,250 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from contextlib import suppress
|
||||
import re
|
||||
import csv
|
||||
|
||||
def getInteractions(filename):
|
||||
data = pd.read_csv(filename, index_col=0, header =0, sep="\t")
|
||||
contactList = getIndexes(data,1)
|
||||
print(contactList)
|
||||
number = len(contactList)
|
||||
return number
|
||||
|
||||
def formatMuts(mut_file,pdbname):
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
muts = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
muts.append(mut)
|
||||
|
||||
mut_list = []
|
||||
outfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/individual_list_"+pdbname+".txt"
|
||||
with open(outfile, "w") as output:
|
||||
for m in muts:
|
||||
print(m)
|
||||
mut = m[:1]+'A'+m[1:]
|
||||
mut_list.append(mut)
|
||||
mut = mut + ";"
|
||||
print(mut)
|
||||
output.write(mut)
|
||||
output.write("\n")
|
||||
return mut_list
|
||||
|
||||
def getIndexes(data, value):
|
||||
colnames = data.columns.values
|
||||
listOfPos = list()
|
||||
result = data.isin([value])
|
||||
result.columns=colnames
|
||||
seriesdata = result.any()
|
||||
columnNames = list(seriesdata[seriesdata==True].index)
|
||||
for col in columnNames:
|
||||
rows = list(result[col][result[col]==True].index)
|
||||
|
||||
for row in rows:
|
||||
listOfPos.append((row,col))
|
||||
|
||||
return listOfPos
|
||||
|
||||
def loadFiles(df):
|
||||
# load a text file in to np matrix
|
||||
resultList = []
|
||||
f = open(df,'r')
|
||||
for line in f:
|
||||
line = line.rstrip('\n')
|
||||
aVals = line.split("\t")
|
||||
fVals = list(map(np.float32, sVals))
|
||||
resultList.append(fVals)
|
||||
f.close()
|
||||
return np.asarray(resultList, dtype=np.float32)
|
||||
|
||||
#=======================================================================
|
||||
def main():
|
||||
pdbname = '3pl1'
|
||||
mut_filename = "pnca_muts_sample.csv"
|
||||
mutlist = formatMuts(mut_filename, pdbname)
|
||||
|
||||
print(mutlist)
|
||||
nmuts = len(mutlist)+1
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
print("start")
|
||||
|
||||
output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname])
|
||||
print("end")
|
||||
for n in range(1,nmuts):
|
||||
print(n)
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname,str(n)])
|
||||
|
||||
for n in range(1,nmuts):
|
||||
print(n)
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname,str(n)])
|
||||
|
||||
|
||||
out = subprocess.check_output(['bash','renamefiles.sh',pdbname])
|
||||
|
||||
dGdatafile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Dif_"+pdbname+"_Repair.txt"
|
||||
dGdata = pd.read_csv(dGdatafile, sep="\t")
|
||||
print(dGdata)
|
||||
ddG=[]
|
||||
for i in range(0,len(dGdata)):
|
||||
ddG.append(dGdata['total energy'].loc[i])
|
||||
print(ddG)
|
||||
distfile = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nc = getInteractions(distfile)
|
||||
|
||||
elecfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_neRR = getInteractions(elecfileRR)
|
||||
|
||||
elecfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_neMM = getInteractions(elecfileMM)
|
||||
|
||||
elecfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_neSM = getInteractions(elecfileSM)
|
||||
|
||||
elecfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_neSS = getInteractions(elecfileSS)
|
||||
|
||||
disufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_ndRR = getInteractions(disufileRR)
|
||||
|
||||
disufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_ndMM = getInteractions(disufileMM)
|
||||
|
||||
disufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_ndSM = getInteractions(disufileSM)
|
||||
|
||||
disufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_ndSS = getInteractions(disufileSS)
|
||||
|
||||
hbndfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nhRR = getInteractions(hbndfileRR)
|
||||
|
||||
hbndfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nhMM = getInteractions(hbndfileMM)
|
||||
|
||||
hbndfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nhSM = getInteractions(hbndfileSM)
|
||||
|
||||
hbndfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nhSS = getInteractions(hbndfileSS)
|
||||
|
||||
partfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_npRR = getInteractions(partfileRR)
|
||||
|
||||
partfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_npMM = getInteractions(partfileMM)
|
||||
|
||||
partfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_npSM = getInteractions(partfileSM)
|
||||
|
||||
partfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_npSS = getInteractions(partfileSS)
|
||||
|
||||
vdwcfileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvRR = getInteractions(vdwcfileRR)
|
||||
|
||||
vdwcfileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvMM = getInteractions(vdwcfileMM)
|
||||
|
||||
vdwcfileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvSM = getInteractions(vdwcfileSM)
|
||||
|
||||
vdwcfileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvSS = getInteractions(vdwcfileSS)
|
||||
|
||||
volufileRR = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvoRR = getInteractions(volufileRR)
|
||||
|
||||
volufileMM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_MM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvoMM = getInteractions(volufileMM)
|
||||
|
||||
volufileSM = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SM_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvoSM = getInteractions(volufileSM)
|
||||
|
||||
volufileSS = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_SS_"+pdbname+"_Repair_PN.txt"
|
||||
wt_nvoSS = getInteractions(volufileSS)
|
||||
|
||||
dnc = []
|
||||
dneRR = []
|
||||
dneMM = []
|
||||
dneSM = []
|
||||
dneSS = []
|
||||
dndRR = []
|
||||
dndMM = []
|
||||
dndSM = []
|
||||
dndSS = []
|
||||
dnhRR = []
|
||||
dnhMM = []
|
||||
dnhSM = []
|
||||
dnhSS = []
|
||||
dnpRR = []
|
||||
dnpMM = []
|
||||
dnpSM = []
|
||||
dnpSS = []
|
||||
dnvRR = []
|
||||
dnvMM = []
|
||||
dnvSM = []
|
||||
dnvSS = []
|
||||
dnvoRR = []
|
||||
dnvoMM = []
|
||||
dnvoSM = []
|
||||
dnvoSS = []
|
||||
for n in range(1, nmuts):
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Distances_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_nc = getInteractions(filename)
|
||||
diffc = wt_nc - mut_nc
|
||||
dnc.append(diffc)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Electro_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_neRR = getInteractions(filename)
|
||||
diffeRR = wt_neRR - mut_neRR
|
||||
dneRR.append(diffeRR)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Disulfide_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_ndRR = getInteractions(filename)
|
||||
diffdRR = wt_ndRR - mut_ndRR
|
||||
dndRR.append(diffdRR)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Hbonds_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_nhRR = getInteractions(filename)
|
||||
diffhRR = wt_nhRR - mut_nhRR
|
||||
dnhRR.append(diffhRR)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Partcov_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_npRR = getInteractions(filename)
|
||||
diffpRR = wt_npRR - mut_npRR
|
||||
dnpRR.append(diffpRR)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_VdWClashes_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_nvRR = getInteractions(filename)
|
||||
diffvRR = wt_nvRR - mut_nvRR
|
||||
dnvRR.append(diffvRR)
|
||||
|
||||
filename = "/home/tanu/git/LSHTM_analysis/foldx/test2/Matrix_Volumetric_RR_"+pdbname+"_Repair_" + str(n)+"_PN.txt"
|
||||
mut_nvoRR = getInteractions(filename)
|
||||
diffvoRR = wt_nvoRR - mut_nvoRR
|
||||
dnvoRR.append(diffvoRR)
|
||||
print(dnc)
|
||||
print(dneRR)
|
||||
print(dndRR)
|
||||
print(dnhRR)
|
||||
print(dnpRR)
|
||||
print(dnvRR)
|
||||
print(dnvoRR)
|
||||
|
||||
results = pd.DataFrame([(ddG),(dnc),(dneRR),(dndRR),(dnhRR),(dnpRR),(dnvRR),(dnvoRR)], columns=mutlist, index=["ddG","contacts","electro","disulfide","hbonds","partcov","VdWClashes","volumetric"])
|
||||
results.append(ddG)
|
||||
print(results)
|
||||
results2 = results.T # transpose df
|
||||
outputfilename = "foldx_results_"+pdbname+".csv"
|
||||
# results.to_csv(outputfilename)
|
||||
results2.to_csv(outputfilename)
|
||||
if __name__ == "__main__":
|
||||
main()
|
456
foldx/test2/runFoldx_test2.py
Executable file
456
foldx/test2/runFoldx_test2.py
Executable file
|
@ -0,0 +1,456 @@
|
|||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
import re
|
||||
import csv
|
||||
import argparse
|
||||
import shutil
|
||||
#https://realpython.com/python-pathlib/
|
||||
|
||||
# FIXME
|
||||
#strong dependency of file and path names
|
||||
#cannot pass file with path. Need to pass them separately
|
||||
#assumptions made for dir struc as standard
|
||||
#datadir + drug + input
|
||||
|
||||
#=======================================================================
|
||||
#%% specify input and curr dir
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
#os.chdir(homedir + '/git/LSHTM_analysis/foldx/')
|
||||
#os.getcwd()
|
||||
|
||||
#=======================================================================
|
||||
#%% command line args
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
|
||||
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
|
||||
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None)
|
||||
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||
|
||||
arg_parser.add_argument('-pdb', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||
|
||||
# FIXME: Doesn't work with 2 chains yet!
|
||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||
arg_parser.add_argument('-c2', '--chain2', help = 'Chain2 ID', default = 'B') # case sensitive
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
#gene_match = gene + '_p.'
|
||||
#%%=====================================================================
|
||||
# Command line options
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
|
||||
datadir = args.datadir
|
||||
indir = args.input_dir
|
||||
outdir = args.output_dir
|
||||
process_dir = args.process_dir
|
||||
|
||||
mut_filename = args.mutation_file
|
||||
chainA = args.chain1
|
||||
chainB = args.chain2
|
||||
pdb_filename = args.pdb_file
|
||||
|
||||
# os.path.splitext will fail interestingly with file.pdb.txt.zip
|
||||
#pdb_name = os.path.splitext(pdb_file)[0]
|
||||
# Just the filename, thanks
|
||||
#pdb_name = Path(in_filename_pdb).stem
|
||||
|
||||
#==============
|
||||
# directories
|
||||
#==============
|
||||
if not datadir:
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
if not indir:
|
||||
indir = datadir + '/' + drug + '/input'
|
||||
|
||||
if not outdir:
|
||||
outdir = datadir + '/' + drug + '/output'
|
||||
|
||||
#TODO: perhaps better handled by refactoring code to prevent generating lots of output files!
|
||||
#if not process_dir:
|
||||
# process_dir = datadir + '/' + drug + '/processing'
|
||||
|
||||
# Make all paths absolute in case the user forgot
|
||||
indir = os.path.abspath(indir)
|
||||
process_dir = os.path.abspath(process_dir)
|
||||
outdir = os.path.abspath(outdir)
|
||||
datadir = os.path.abspath(datadir)
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
# FIXME
|
||||
if pdb_filename:
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
else:
|
||||
pdb_filename = gene.lower() + '_complex.pdb'
|
||||
pdb_name = Path(pdb_filename).stem
|
||||
|
||||
infile_pdb = indir + '/' + pdb_filename
|
||||
actual_pdb_filename = Path(infile_pdb).name
|
||||
#actual_pdb_filename = os.path.abspath(infile_pdb)
|
||||
|
||||
if mut_filename:
|
||||
mutation_file = os.path.abspath(mut_filename)
|
||||
infile_muts = mutation_file
|
||||
print('User-provided mutation file in use:', infile_muts)
|
||||
else:
|
||||
mutation_file = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||
infile_muts = outdir + '/' + mutation_file
|
||||
print('WARNING: Assuming default mutation file:', infile_muts)
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
out_filename = gene.lower() + '_foldx.csv'
|
||||
outfile_foldx = outdir + '/' + out_filename
|
||||
|
||||
print('Arguments being passed:'
|
||||
, '\nDrug:', args.drug
|
||||
, '\ngene:', args.gene
|
||||
, '\ninput dir:', indir
|
||||
, '\nprocess dir:', process_dir
|
||||
, '\noutput dir:', outdir
|
||||
, '\npdb file:', infile_pdb
|
||||
, '\npdb name:', pdb_name
|
||||
, '\nactual pdb name:', actual_pdb_filename
|
||||
, '\nmutation file:', infile_muts
|
||||
, '\nchain1:', args.chain1
|
||||
, '\noutput file:', outfile_foldx
|
||||
, '\n=============================================================')
|
||||
#=======================================================================
|
||||
|
||||
def getInteractionEnergy(filename):
|
||||
data = pd.read_csv(filename,sep = '\t')
|
||||
return data['Interaction Energy'].loc[0]
|
||||
|
||||
def getInteractions(filename):
|
||||
data = pd.read_csv(filename, index_col = 0, header = 0, sep = '\t')
|
||||
contactList = getIndexes(data,1)
|
||||
number = len(contactList)
|
||||
return number
|
||||
|
||||
def formatMuts(mut_file,pdbname):
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
muts = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
muts.append(mut)
|
||||
|
||||
mut_list = []
|
||||
outfile = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(outfile, 'w') as output:
|
||||
for m in muts:
|
||||
print(m)
|
||||
mut = m[:1] + chainA+ m[1:]
|
||||
mut_list.append(mut)
|
||||
mut = mut + ';'
|
||||
print(mut)
|
||||
output.write(mut)
|
||||
output.write('\n')
|
||||
return mut_list
|
||||
|
||||
def getIndexes(data, value):
|
||||
colnames = data.columns.values
|
||||
listOfPos = list()
|
||||
result = data.isin([value])
|
||||
result.columns = colnames
|
||||
seriesdata = result.any()
|
||||
columnNames = list(seriesdata[seriesdata==True].index)
|
||||
for col in columnNames:
|
||||
rows = list(result[col][result[col]==True].index)
|
||||
|
||||
for row in rows:
|
||||
listOfPos.append((row,col))
|
||||
|
||||
return listOfPos
|
||||
|
||||
def loadFiles(df):
|
||||
# load a text file in to np matrix
|
||||
resultList = []
|
||||
f = open(df,'r')
|
||||
for line in f:
|
||||
line = line.rstrip('\n')
|
||||
aVals = line.split('\t')
|
||||
fVals = list(map(np.float32, sVals))
|
||||
resultList.append(fVals)
|
||||
f.close()
|
||||
return np.asarray(resultList, dtype=np.float32)
|
||||
|
||||
# TODO: use this code pattern rather than invoking bash
|
||||
#def repairPDB():
|
||||
# subprocess.call(['foldx'
|
||||
# , '--command=RepairPDB'
|
||||
# , '--pdb-dir=' + indir
|
||||
# , '--pdb=' + actual_pdb_filename
|
||||
# , '--ionStrength=0.05'#
|
||||
# , '--pH=7'
|
||||
# , '--water=PREDICT'
|
||||
# , '--vdwDesign=1'
|
||||
# , 'outPDB=true'
|
||||
# , '--output-dir=' + process_dir])
|
||||
|
||||
#=======================================================================
|
||||
def main():
|
||||
pdbname = pdb_name
|
||||
comp = '' # for complex only
|
||||
mut_filename = infile_muts #pnca_mcsm_snps.csv
|
||||
mutlist = formatMuts(mut_filename, pdbname)
|
||||
|
||||
print(mutlist)
|
||||
nmuts = len(mutlist)
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
print('start')
|
||||
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
|
||||
print('\033[95mSTAGE: repair PDB\033[0m')
|
||||
print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir))
|
||||
#subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
|
||||
# once you decide to use the function
|
||||
# repairPDB(pdbname)
|
||||
|
||||
# FIXME: put this hack elsewhere
|
||||
foldx_common=' --ionStrength=0.05 --pH=7 --water=PREDICT --vdwDesign=1 '
|
||||
|
||||
subprocess.call(['foldx'
|
||||
, '--command=RepairPDB'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + indir
|
||||
, '--pdb=' + actual_pdb_filename
|
||||
, 'outPDB=true'
|
||||
, '--output-dir=' + process_dir])
|
||||
print('\033[95mCOMPLETE: repair PDB\033[0m')
|
||||
print('\033[95mSTAGE: run FoldX (subprocess)\033[0m')
|
||||
print('EXECUTING: runfoldx.sh %s %s ' % (pdbname, process_dir))
|
||||
#output = subprocess.check_output(['bash', 'runfoldx.sh', pdbname, process_dir])
|
||||
|
||||
print('Running foldx BuildModel')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=BuildModel'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--mutant-file="individual_list_' + pdbname +'.txt"'
|
||||
, 'outPDB=true'
|
||||
, '--numberOfRuns=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx PrintNetworks')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx SequenceDetail')
|
||||
subprocess.call(['foldx'
|
||||
, '--command=SequenceDetail'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
|
||||
print('\033[95mCOMPLETE: run FoldX (subprocess)\033[0m')
|
||||
|
||||
print('\033[95mSTAGE: Print Networks (shell)\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mNETWORK:\033[0m', n)
|
||||
#print('\033[96mCommand:\033[0m runPrintNetworks.sh %s %s %s' % (pdbname, str(n), process_dir ))
|
||||
#with suppress(Exception):
|
||||
#foldx --command=PrintNetworks --pdb="${PDB}_Repair_${n}.pdb" --water=PREDICT --vdwDesign=1 --output-dir=${OUTDIR}
|
||||
print('Running foldx PrintNetworks for mutation', n)
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
#subprocess.check_output(['bash', 'runPrintNetworks.sh', pdbname, str(n), process_dir])
|
||||
print('\033[95mCOMPLETE: Print Networks (shell)\033[0m')
|
||||
|
||||
print('\033[95mSTAGE: Rename Mutation Files (shell)\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mMUTATION:\033[0m', n)
|
||||
print('\033[96mCommand:\033[0m mutrenamefiles.sh %s %s %s' % (pdbname, str(n), process_dir ))
|
||||
# FIXME: this is bad design and needs to be done in a pythonic way
|
||||
with suppress(Exception):
|
||||
subprocess.check_output(['bash', 'mutrenamefiles.sh', pdbname, str(n), process_dir])
|
||||
print('\033[95mCOMPLETE: Rename Mutation Files (shell)\033[0m')
|
||||
|
||||
print('\033[95mSTAGE: Rename Files (shell)\033[0m')
|
||||
# FIXME: this is bad design and needs to be done in a pythonic way
|
||||
out = subprocess.check_output(['bash','renamefiles.sh', pdbname, process_dir])
|
||||
print('\033[95mCOMPLETE: Rename Files (shell)\033[0m')
|
||||
|
||||
if comp=='y':
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess)\033[0m')
|
||||
chain1=chainA
|
||||
chain2=chainB
|
||||
#with suppress(Exception):
|
||||
#subprocess.check_output(['bash','runcomplex.sh', pdbname, chain1, chain2, process_dir])
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
|
||||
ac_dest = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
shutil.copyfile(ac_source, ac_dest)
|
||||
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
|
||||
#with suppress(Exception):
|
||||
# subprocess.check_output(['bash','mutruncomplex.sh', pdbname, chain1, chain2, str(n), process_dir])
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
|
||||
ac_mut_dest = process_dir + '/Summary_' + pdbname + '_Repair)' + str(n) +'_AC.txt'
|
||||
shutil.copyfile(ac_mut_source, ac_mut_dest)
|
||||
print('\033[95mCOMPLETE: foldx AnalyseComplex (subprocess) for mutation:\033[0m', n)
|
||||
|
||||
interactions = ['Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS',
|
||||
'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM',
|
||||
'VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
dGdatafile = process_dir + '/Dif_' + pdbname + '_Repair.txt'
|
||||
dGdata = pd.read_csv(dGdatafile, sep = '\t')
|
||||
|
||||
ddG=[]
|
||||
print('ddG')
|
||||
print(len(dGdata))
|
||||
for i in range(0,len(dGdata)):
|
||||
ddG.append(dGdata['total energy'].loc[i])
|
||||
|
||||
|
||||
nint = len(interactions)
|
||||
wt_int = []
|
||||
|
||||
for i in interactions:
|
||||
filename = process_dir + '/Matrix_' + i + '_'+ pdbname + '_Repair_PN.txt'
|
||||
wt_int.append(getInteractions(filename))
|
||||
print('wt')
|
||||
print(wt_int)
|
||||
|
||||
ntotal = nint+1
|
||||
print(ntotal)
|
||||
print(nmuts)
|
||||
data = np.empty((ntotal,nmuts))
|
||||
data[0] = ddG
|
||||
print(data)
|
||||
for i in range(0,len(interactions)):
|
||||
d=[]
|
||||
p=0
|
||||
for n in range(1, nmuts+1):
|
||||
print(i)
|
||||
filename = process_dir + '/Matrix_' + interactions[i] + '_' + pdbname + '_Repair_' + str(n) + '_PN.txt'
|
||||
mut = getInteractions(filename)
|
||||
diff = wt_int[i] - mut
|
||||
print(diff)
|
||||
print(wt_int[i])
|
||||
print(mut)
|
||||
d.append(diff)
|
||||
print(d)
|
||||
data[i+1] = d
|
||||
|
||||
interactions = ['ddG', 'Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS', 'Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS']
|
||||
|
||||
print(interactions)
|
||||
|
||||
IE = []
|
||||
if comp=='y':
|
||||
wtfilename = process_dir + '/Summary_' + pdbname + '_Repair_AC.txt'
|
||||
wtE = getInteractionEnergy(wtfilename)
|
||||
print(wtE)
|
||||
for n in range(1,nmuts+1):
|
||||
print(n)
|
||||
filename = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) + '_AC.txt'
|
||||
mutE = getInteractionEnergy(filename)
|
||||
print(mutE)
|
||||
diff = wtE - mutE
|
||||
print(diff)
|
||||
IE.append(diff)
|
||||
print(IE)
|
||||
IEresults = pd.DataFrame(IE,columns = ['Interaction Energy'], index = mutlist)
|
||||
IEfilename = 'foldx_complexresults_'+pdbname+'.csv'
|
||||
IEresults.to_csv(IEfilename)
|
||||
print(len(IE))
|
||||
data = np.append(data,[IE], axis = 0)
|
||||
print(data)
|
||||
interactions = ['ddG','Distances','Electro_RR','Electro_MM','Electro_SM','Electro_SS','Disulfide_RR','Disulfide_MM','Disulfide_SM','Disulfide_SS','Hbonds_RR','Hbonds_MM','Hbonds_SM','Hbonds_SS','Partcov_RR','Partcov_MM','Partcov_SM','Partcov_SS','VdWClashes_RR','VdWClashes_MM','VdWClashes_SM','VdWClashes_SS','Volumetric_RR','Volumetric_MM','Volumetric_SM','Volumetric_SS','Interaction Energy']
|
||||
|
||||
mut_file = process_dir + '/individual_list_' + pdbname + '.txt'
|
||||
with open(mut_file) as csvfile:
|
||||
readCSV = csv.reader(csvfile)
|
||||
mutlist = []
|
||||
for row in readCSV:
|
||||
mut = row[0]
|
||||
mutlist.append(mut)
|
||||
print(mutlist)
|
||||
print(len(mutlist))
|
||||
print(data)
|
||||
results = pd.DataFrame(data, columns = mutlist, index = interactions)
|
||||
results.append(ddG)
|
||||
#print(results.head())
|
||||
|
||||
# my style formatted results
|
||||
results2 = results.T # transpose df
|
||||
results2.index.name = 'mutationinformation' # assign name to index
|
||||
results2 = results2.reset_index() # turn it into a columns
|
||||
|
||||
results2['mutationinformation'] = results2['mutationinformation'].replace({r'([A-Z]{1})[A-Z]{1}([0-9]+[A-Z]{1});' : r'\1 \2'}, regex = True) # capture mcsm style muts (i.e not the chain id)
|
||||
results2['mutationinformation'] = results2['mutationinformation'].str.replace(' ', '') # remove empty space
|
||||
|
||||
results2.rename(columns = {'Distances': 'Contacts'}, inplace = True)
|
||||
|
||||
# lower case columns
|
||||
results2.columns = results2.columns.str.lower()
|
||||
|
||||
print('Writing file in the format below:\n'
|
||||
, results2.head()
|
||||
, '\nNo. of rows:', len(results2)
|
||||
, '\nNo. of cols:', len(results2.columns))
|
||||
|
||||
outputfilename = outfile_foldx
|
||||
#outputfilename = 'foldx_results_' + pdbname + '.csv'
|
||||
#results.to_csv(outputfilename)
|
||||
results2.to_csv(outputfilename, index = False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -26,7 +26,7 @@ Created on Tue Aug 6 12:56:03 2019
|
|||
# 1) <gene>_gwas.csv
|
||||
# 2) <gene>_common_ids.csv
|
||||
# 3) <gene>_ambiguous_muts.csv
|
||||
# 4) <gene>_mcsm_snps.csv
|
||||
# 4) <gene>_mcsm_formatted_snps.csv
|
||||
# 5) <gene>_metadata_poscounts.csv
|
||||
# 6) <gene>_metadata.csv
|
||||
# 7) <gene>_all_muts_msa.csv
|
||||
|
@ -1193,7 +1193,7 @@ if snps_only.mutationinformation.isna().sum() == 0:
|
|||
else:
|
||||
sys.exit('FAIL: SNP has NA, Possible mapping issues from dict?')
|
||||
|
||||
out_filename_mcsmsnps = gene.lower() + '_mcsm_style_snps.csv'
|
||||
out_filename_mcsmsnps = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||
outfile_mcsmsnps = outdir + '/' + out_filename_mcsmsnps
|
||||
|
||||
print('\n----------------------------------'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue