renamed file paths and names to run mcsm
This commit is contained in:
parent
cf7d6f9f03
commit
f026efb4db
4 changed files with 115 additions and 57 deletions
|
@ -11,8 +11,8 @@
|
||||||
# per line. Sort by unique, which automatically removes duplicates.
|
# per line. Sort by unique, which automatically removes duplicates.
|
||||||
# sace file in current directory
|
# sace file in current directory
|
||||||
#**********************************************************************
|
#**********************************************************************
|
||||||
infile="${HOME}/git/Data/input/processed/pyrazinamide/pnca_mis_SNPs_v2.csv"
|
infile="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2.csv"
|
||||||
outfile="${HOME}/git/Data/input/processed/pyrazinamide/pnca_mis_SNPs_v2_unique.csv"
|
outfile="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2_unique.csv"
|
||||||
|
|
||||||
# sort unique entries and output to current directory
|
# sort unique entries and output to current directory
|
||||||
sort -u ${infile} > ${outfile}
|
sort -u ${infile} > ${outfile}
|
||||||
|
|
|
@ -1,30 +1,49 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
#*************************************
|
|
||||||
#need to be in the correct directory
|
|
||||||
#*************************************
|
|
||||||
##: comments for code
|
|
||||||
#: commented out code
|
|
||||||
|
|
||||||
#**********************************************************************
|
#**********************************************************************
|
||||||
# TASK: submit requests using curl: HANDLE redirects and refresh url.
|
# TASK: submit requests using curl: HANDLE redirects and refresh url.
|
||||||
# Iterate over mutation file and write/append result urls to a file
|
# Iterate over mutation file and write/append result urls to a file
|
||||||
# result url file: stored in the /Results directory
|
# Mutation file must have one mutation (format A1B) per line
|
||||||
# mutation file: one mutation per line, no chain ID
|
# Requirements
|
||||||
# output: in a file, should be n urls (n=no. of mutations in file)
|
# input: mutation list (format: A1B), complex struc: (pdb format)
|
||||||
|
# mutation: outFile from step0, one unique mutation/line, no chain ID
|
||||||
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
|
# structure: pdb file of drug-target complex
|
||||||
|
# path: "Data/<drug>/input/structure/<filename>"
|
||||||
|
# output: should be n urls (n=no. of unique mutations in file)
|
||||||
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
|
|
||||||
# NOTE: these are just result urls, not actual values for results
|
# NOTE: these are just result urls, not actual values for results
|
||||||
#**********************************************************************
|
#**********************************************************************
|
||||||
## iterate over mutation file; line by line and submit query using curl
|
# specify variables for input and output paths and filenames
|
||||||
filename="../Data/pnca_mis_SNPs_v2_unique.csv"
|
|
||||||
|
|
||||||
## some useful messages
|
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||||
echo -n -e "Processing $(wc -l < ${filename}) entries from ${filename}\n"
|
processed_path="/processed"
|
||||||
|
struc_path="/structure"
|
||||||
|
infile_mut="/pnca_mis_SNPs_v2_unique.csv"
|
||||||
|
infile_struc="/complex1_no_water.pdb"
|
||||||
|
|
||||||
|
outpath="${inpath}${processed_path}"
|
||||||
|
outfile="/mCSM_lig_complex1_result_url.txt"
|
||||||
|
|
||||||
|
# create valid input and output filenames
|
||||||
|
#filename="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2_unique.csv"
|
||||||
|
filename="${inpath}${processed_path}${infile_mut}"
|
||||||
|
echo Input File is: ${filename}
|
||||||
|
|
||||||
|
outfilename="${outpath}${outfile}"
|
||||||
|
echo Output File will be: ${outfilename}
|
||||||
|
|
||||||
|
# iterate over mutation file; line by line and submit query using curl
|
||||||
|
# some useful messages
|
||||||
|
echo -n -e "Processing $(wc -l < ${filename}) entries from ${infile_mut}\n"
|
||||||
COUNT=0
|
COUNT=0
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
((COUNT++))
|
((COUNT++))
|
||||||
mutation="${line}"
|
mutation="${line}"
|
||||||
# echo "${mutation}"
|
# echo "${mutation}"
|
||||||
pdb='../Data/complex1_no_water.pdb'
|
#pdb='../Data/complex1_no_water.pdb'
|
||||||
|
pdb="${inpath}${struc_path}${infile_struc}"
|
||||||
mutation="${mutation}"
|
mutation="${mutation}"
|
||||||
chain="A"
|
chain="A"
|
||||||
lig_id="PZA"
|
lig_id="PZA"
|
||||||
|
@ -49,24 +68,31 @@ refresh_url=$(curl -L \
|
||||||
-F "affin_wt=${affin_wt}" \
|
-F "affin_wt=${affin_wt}" \
|
||||||
${host}${call_url} | grep "http-equiv")
|
${host}${call_url} | grep "http-equiv")
|
||||||
|
|
||||||
#echo $refresh_url
|
#echo Refresh URL: $refresh_url
|
||||||
#echo ${host}${refresh_url}
|
#echo Host+Refresh: ${host}${refresh_url}
|
||||||
|
|
||||||
# use regex to extract the relevant bit from the refresh url
|
# use regex to extract the relevant bit from the refresh url
|
||||||
# regex:sed -r 's/.*(\/mcsm.*)".*$/\1/g'
|
# regex:sed -r 's/.*(\/mcsm.*)".*$/\1/g'
|
||||||
|
|
||||||
#Now build: result url using host and refresh url and write the urls to a file in the Results dir
|
# Now build: result url using host and refresh url and write the urls to a file
|
||||||
result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g')
|
result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g')
|
||||||
sleep 10
|
sleep 10
|
||||||
|
|
||||||
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${filename})..."
|
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${filename})..."
|
||||||
|
|
||||||
echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_mCSM_lig_complex1_result_url.txt
|
# create output file with the added number of muts from file
|
||||||
|
# after much thought, bad idea as less generic!
|
||||||
|
#echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_mCSM_lig_complex1_result_url.txt
|
||||||
|
echo -e "${host}${result_url}" >> ${outfilename}
|
||||||
#echo -n '.'
|
#echo -n '.'
|
||||||
done < "${filename}"
|
done < "${filename}"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo Output filename: ${outfilename}
|
||||||
|
echo
|
||||||
|
echo Number of urls saved: $(wc -l < ${filename})
|
||||||
echo
|
echo
|
||||||
echo "Processing Complete"
|
echo "Processing Complete"
|
||||||
|
|
||||||
##end of submitting query, receiving result url and storing results url in a file
|
# end of submitting query, receiving result url and storing results url in a file
|
||||||
|
|
||||||
|
|
|
@ -1,23 +1,21 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#*************************************
|
|
||||||
#need to be in the correct directory
|
|
||||||
#*************************************
|
|
||||||
##: comments for code
|
|
||||||
#: commented out code
|
|
||||||
|
|
||||||
#********************************************************************
|
#********************************************************************
|
||||||
# TASK: submit result urls and fetch actual results using curl
|
# TASK: submit result urls and fetch actual results using curl
|
||||||
# iterate over each result url from the output of step1 in the stored
|
# Iterate over each result url from the output of step1 stored in processed/
|
||||||
# in file in /Results.
|
|
||||||
# Use curl to fetch results and extract relevant sections using hxtools
|
# Use curl to fetch results and extract relevant sections using hxtools
|
||||||
# and store these in another file in /Results
|
# and store these in another file in processed/
|
||||||
# This script takes two arguments:
|
|
||||||
# input file: file containing results url
|
|
||||||
# In this case: 336_mCSM_lig_complex1_result_url.txt
|
|
||||||
# output file: name of the file where extracted results will be stored
|
|
||||||
# In this case : it is 336_mCSM_lig_complex1_output_MASTER.txt
|
|
||||||
#*********************************************************************
|
|
||||||
|
|
||||||
|
# Requirements:
|
||||||
|
# input: output of step1, file containing result urls
|
||||||
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
|
# output: name of the file where extracted results will be stored
|
||||||
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
|
|
||||||
|
# Optional: can make these command line args you pass when calling script
|
||||||
|
# by uncommenting code as indicated
|
||||||
|
#*********************************************************************
|
||||||
|
############################# uncomment: to make it command line args
|
||||||
#if [ "$#" -ne 2 ]; then
|
#if [ "$#" -ne 2 ]; then
|
||||||
#if [ -Z $1 ]; then
|
#if [ -Z $1 ]; then
|
||||||
# echo "
|
# echo "
|
||||||
|
@ -32,11 +30,26 @@
|
||||||
# Second argument: Output File
|
# Second argument: Output File
|
||||||
#infile=$1
|
#infile=$1
|
||||||
#outfile=$2
|
#outfile=$2
|
||||||
|
############################ end of code block to make command line args
|
||||||
|
|
||||||
infile="${HOME}/git/LSHTM_analysis/mcsm_complex1/Results/336_mCSM_lig_complex1_result_url.txt"
|
# specify variables for input and output paths and filenames
|
||||||
outfile="${HOME}/git/LSHTM_analysis/mcsm_complex1/Results/336_mCSM_lig_complex1_output_MASTER.txt"
|
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||||
|
processed_path="/processed"
|
||||||
|
infile="/mCSM_lig_complex1_result_url.txt"
|
||||||
|
|
||||||
echo -n "Processing $(wc -l < ${infile}) entries from ${infile}"
|
outpath="${inpath}${processed_path}"
|
||||||
|
outfile="/mCSM_lig_complex1_output_MASTER.txt"
|
||||||
|
|
||||||
|
# create valid input and output filenames
|
||||||
|
filename="${inpath}${processed_path}${infile}"
|
||||||
|
echo Input File is: ${filename}
|
||||||
|
|
||||||
|
outfilename="${outpath}${outfile}"
|
||||||
|
echo Output File will be: ${outfilename}
|
||||||
|
|
||||||
|
# Iterate over each result url, and extract results using hxtools
|
||||||
|
# which nicely cleans and formats html
|
||||||
|
echo -n "Processing $(wc -l < ${filename}) entries from ${infile}"
|
||||||
echo
|
echo
|
||||||
COUNT=0
|
COUNT=0
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
|
@ -48,12 +61,13 @@ while read -r line; do
|
||||||
| hxselect -c div.well \
|
| hxselect -c div.well \
|
||||||
| sed -r -e 's/<[^>]*>//g' \
|
| sed -r -e 's/<[^>]*>//g' \
|
||||||
| sed -re 's/ +//g' \
|
| sed -re 's/ +//g' \
|
||||||
>> ${outfile}
|
>> ${outfilename}
|
||||||
#| tee -a ${outfile}
|
#| tee -a ${outfilename}
|
||||||
# echo -n '.'
|
# echo -n '.'
|
||||||
echo -e "Processing entry ${COUNT}/$(wc -l < ${infile})..."
|
echo -e "Processing entry ${COUNT}/$(wc -l < ${filename})..."
|
||||||
|
|
||||||
done < "${infile}"
|
done < "${filename}"
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "Processing Complete"
|
echo "Processing Complete"
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#*************************************
|
|
||||||
#need to be in the correct directory
|
|
||||||
#*************************************
|
|
||||||
##: comments for code
|
|
||||||
#: commented out code
|
|
||||||
|
|
||||||
#********************************************************************
|
#********************************************************************
|
||||||
# TASK: Intermediate results processing
|
# TASK: Intermediate results processing
|
||||||
|
@ -14,12 +9,36 @@
|
||||||
# prevent this from happening. Additionally there are other empty lines
|
# prevent this from happening. Additionally there are other empty lines
|
||||||
# that need to be omiited. In order ensure these sections are not split
|
# that need to be omiited. In order ensure these sections are not split
|
||||||
# over multiple lines, this script is written.
|
# over multiple lines, this script is written.
|
||||||
#*********************************************************************
|
|
||||||
|
|
||||||
infile="../Results/336_mCSM_lig_complex1_output_processed.txt"
|
# Requirements:
|
||||||
|
# input: output of step2, file containing mcsm results as described above
|
||||||
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
|
# output: replaces file in place.
|
||||||
|
# Therefore first create a copy of the input file
|
||||||
|
# but rename it to remove the word "MASTER" and add the word "processed"
|
||||||
|
# file format: .txt
|
||||||
|
|
||||||
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${infile} \
|
# NOTE: This replaces the file in place!
|
||||||
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${infile}
|
# the output is a txt file with no newlines and formatting
|
||||||
|
# to have the following format "<colname><:><value>
|
||||||
|
#***********************************************************************
|
||||||
|
# specify variables for input and output paths and filenames
|
||||||
|
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||||
|
processed_path="/processed"
|
||||||
|
|
||||||
|
# Create input file: copy and rename output file of step2
|
||||||
|
oldfile="${inpath}${processed_path}/mCSM_lig_complex1_output_MASTER.txt"
|
||||||
|
newfile="${inpath}${processed_path}/mCSM_lig_complex1_output_processed.txt"
|
||||||
|
cp $oldfile $newfile
|
||||||
|
|
||||||
|
#infile="../Results/336_mCSM_lig_complex1_output_processed.txt"
|
||||||
|
infile="/mCSM_lig_complex1_output_processed.txt"
|
||||||
|
filename="${inpath}${processed_path}${infile}"
|
||||||
|
|
||||||
|
echo Input filename is : ${filename}
|
||||||
|
|
||||||
|
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${filename} \
|
||||||
|
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${filename}
|
||||||
|
|
||||||
# Outputs records separated by a newline, that look something like this:
|
# Outputs records separated by a newline, that look something like this:
|
||||||
# PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing
|
# PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing
|
||||||
|
@ -36,7 +55,6 @@ infile="../Results/336_mCSM_lig_complex1_output_processed.txt"
|
||||||
# (...etc)
|
# (...etc)
|
||||||
|
|
||||||
# This script brings everything in a convenient format for further processing in python.
|
# This script brings everything in a convenient format for further processing in python.
|
||||||
# bear in mind, this replaces the file in place, so make sure you retain a copy for your records
|
|
||||||
sed -i '/PredictedAffinityChange/ {
|
sed -i '/PredictedAffinityChange/ {
|
||||||
N
|
N
|
||||||
N
|
N
|
||||||
|
@ -49,4 +67,4 @@ N
|
||||||
N
|
N
|
||||||
s/\n//g
|
s/\n//g
|
||||||
}
|
}
|
||||||
/^$/d' ${infile}
|
/^$/d' ${filename}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue