consistent input & output variables for filenames to run mcsm
This commit is contained in:
parent
ef99167679
commit
e3f4c630a1
4 changed files with 71 additions and 55 deletions
|
@ -15,35 +15,40 @@
|
||||||
|
|
||||||
# NOTE: these are just result urls, not actual values for results
|
# NOTE: these are just result urls, not actual values for results
|
||||||
#**********************************************************************
|
#**********************************************************************
|
||||||
# specify variables for input and output paths and filenames
|
############# specify variables for input and output paths and filenames
|
||||||
|
homedir="${HOME}"
|
||||||
|
#echo Home directory is ${homedir}
|
||||||
|
basedir="/git/Data/pyrazinamide/input"
|
||||||
|
|
||||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
# input
|
||||||
processed_path="/processed"
|
inpath_mut="/processed"
|
||||||
struc_path="/structure"
|
in_filename_mut="/pnca_mis_SNPs_v2_unique.csv"
|
||||||
infile_mut="/pnca_mis_SNPs_v2_unique.csv"
|
infile_mut="${homedir}${basedir}${inpath_mut}${in_filename_mut}"
|
||||||
infile_struc="/complex1_no_water.pdb"
|
echo Input Mut filename: ${infile_mut}
|
||||||
|
|
||||||
outpath="${inpath}${processed_path}"
|
inpath_struc="/structure"
|
||||||
outfile="/complex1_result_url.txt"
|
in_filename_struc="/complex1_no_water.pdb"
|
||||||
|
infile_struc="${homedir}${basedir}${inpath_struc}${in_filename_struc}"
|
||||||
|
echo Input Struc filename: ${infile_struc}
|
||||||
|
|
||||||
# create valid input and output filenames
|
# output
|
||||||
#filename="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2_unique.csv"
|
outpath="/processed"
|
||||||
filename="${inpath}${processed_path}${infile_mut}"
|
out_filename="/complex1_result_url.txt"
|
||||||
echo Input File is: ${filename}
|
outfile="${homedir}${basedir}${outpath}${out_filename}"
|
||||||
|
#echo Output filename: ${outfile}
|
||||||
|
################## end of variable assignment for input and output files
|
||||||
|
|
||||||
outfilename="${outpath}${outfile}"
|
# iterate over mutation file (infile_mut); line by line and
|
||||||
echo Output File will be: ${outfilename}
|
# submit query using curl
|
||||||
|
|
||||||
# iterate over mutation file; line by line and submit query using curl
|
|
||||||
# some useful messages
|
# some useful messages
|
||||||
echo -n -e "Processing $(wc -l < ${filename}) entries from ${infile_mut}\n"
|
echo -n -e "Processing $(wc -l < ${infile_mut}) entries from ${infile_mut}\n"
|
||||||
COUNT=0
|
COUNT=0
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
((COUNT++))
|
((COUNT++))
|
||||||
mutation="${line}"
|
mutation="${line}"
|
||||||
# echo "${mutation}"
|
# echo "${mutation}"
|
||||||
#pdb='../Data/complex1_no_water.pdb'
|
#pdb='../Data/complex1_no_water.pdb'
|
||||||
pdb="${inpath}${struc_path}${infile_struc}"
|
pdb="${infile_struc}"
|
||||||
mutation="${mutation}"
|
mutation="${mutation}"
|
||||||
chain="A"
|
chain="A"
|
||||||
lig_id="PZA"
|
lig_id="PZA"
|
||||||
|
@ -51,7 +56,7 @@ affin_wt="0.99"
|
||||||
host="http://biosig.unimelb.edu.au"
|
host="http://biosig.unimelb.edu.au"
|
||||||
call_url="/mcsm_lig/prediction"
|
call_url="/mcsm_lig/prediction"
|
||||||
|
|
||||||
##=========================================
|
#=========================================
|
||||||
##html field_names names required for curl
|
##html field_names names required for curl
|
||||||
##complex_field:wild=@
|
##complex_field:wild=@
|
||||||
##mutation_field:mutation=@
|
##mutation_field:mutation=@
|
||||||
|
@ -78,19 +83,20 @@ refresh_url=$(curl -L \
|
||||||
result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g')
|
result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g')
|
||||||
sleep 10
|
sleep 10
|
||||||
|
|
||||||
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${filename})..."
|
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${infile_mut})..."
|
||||||
|
|
||||||
# create output file with the added number of muts from file
|
# create output file with the added number of muts from file
|
||||||
# after much thought, bad idea as less generic!
|
# after much thought, bad idea as less generic!
|
||||||
#echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_complex1_result_url.txt
|
#echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_complex1_result_url.txt
|
||||||
echo -e "${host}${result_url}" >> ${outfilename}
|
echo -e "${host}${result_url}" >> ${outfile}
|
||||||
#echo -n '.'
|
#echo -n '.'
|
||||||
done < "${filename}"
|
done < "${infile_mut}"
|
||||||
|
|
||||||
|
#FIXME: stop executing if error else these echo statements are misleading!
|
||||||
echo
|
echo
|
||||||
echo Output filename: ${outfilename}
|
echo Output filename: ${outfile}
|
||||||
echo
|
echo
|
||||||
echo Number of urls saved: $(wc -l < ${filename})
|
echo Number of urls saved: $(wc -l < ${infile_mut})
|
||||||
echo
|
echo
|
||||||
echo "Processing Complete"
|
echo "Processing Complete"
|
||||||
|
|
||||||
|
|
|
@ -32,24 +32,27 @@
|
||||||
#outfile=$2
|
#outfile=$2
|
||||||
############################ end of code block to make command line args
|
############################ end of code block to make command line args
|
||||||
|
|
||||||
# specify variables for input and output paths and filenames
|
############# specify variables for input and output paths and filenames
|
||||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
homedir="${HOME}"
|
||||||
processed_path="/processed"
|
#echo Home directory is ${homedir}
|
||||||
infile="/complex1_result_url.txt"
|
basedir="/git/Data/pyrazinamide/input"
|
||||||
|
|
||||||
outpath="${inpath}${processed_path}"
|
# input
|
||||||
outfile="/complex1_output_MASTER.txt"
|
inpath="/processed"
|
||||||
|
in_filename="/complex1_result_url.txt"
|
||||||
|
infile="${homedir}${basedir}${inpath}${in_filename}"
|
||||||
|
echo Input Mut filename: ${infile}
|
||||||
|
|
||||||
# create valid input and output filenames
|
# output
|
||||||
filename="${inpath}${processed_path}${infile}"
|
outpath="/processed"
|
||||||
echo Input File is: ${filename}
|
out_filename="/complex1_output_MASTER.txt"
|
||||||
|
outfile="${homedir}${basedir}${outpath}${out_filename}"
|
||||||
outfilename="${outpath}${outfile}"
|
echo Output filename: ${outfile}
|
||||||
echo Output File will be: ${outfilename}
|
################## end of variable assignment for input and output files
|
||||||
|
|
||||||
# Iterate over each result url, and extract results using hxtools
|
# Iterate over each result url, and extract results using hxtools
|
||||||
# which nicely cleans and formats html
|
# which nicely cleans and formats html
|
||||||
echo -n "Processing $(wc -l < ${filename}) entries from ${infile}"
|
echo -n "Processing $(wc -l < ${infile}) entries from ${infile}"
|
||||||
echo
|
echo
|
||||||
COUNT=0
|
COUNT=0
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
|
@ -61,12 +64,12 @@ while read -r line; do
|
||||||
| hxselect -c div.well \
|
| hxselect -c div.well \
|
||||||
| sed -r -e 's/<[^>]*>//g' \
|
| sed -r -e 's/<[^>]*>//g' \
|
||||||
| sed -re 's/ +//g' \
|
| sed -re 's/ +//g' \
|
||||||
>> ${outfilename}
|
>> ${outfile}
|
||||||
#| tee -a ${outfilename}
|
#| tee -a ${outfile}
|
||||||
# echo -n '.'
|
# echo -n '.'
|
||||||
echo -e "Processing entry ${COUNT}/$(wc -l < ${filename})..."
|
echo -e "Processing entry ${COUNT}/$(wc -l < ${infile})..."
|
||||||
|
|
||||||
done < "${filename}"
|
done < "${infile}"
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "Processing Complete"
|
echo "Processing Complete"
|
||||||
|
|
|
@ -22,22 +22,27 @@
|
||||||
# the output is a txt file with no newlines and formatting
|
# the output is a txt file with no newlines and formatting
|
||||||
# to have the following format "<colname><:><value>
|
# to have the following format "<colname><:><value>
|
||||||
#***********************************************************************
|
#***********************************************************************
|
||||||
# specify variables for input and output paths and filenames
|
############# specify variables for input and output paths and filenames
|
||||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
homedir="${HOME}"
|
||||||
processed_path="/processed"
|
basedir="/git/Data/pyrazinamide/input"
|
||||||
|
|
||||||
|
inpath="/processed"
|
||||||
|
|
||||||
# Create input file: copy and rename output file of step2
|
# Create input file: copy and rename output file of step2
|
||||||
oldfile="${inpath}${processed_path}/complex1_output_MASTER.txt"
|
oldfile="${homedir}${basedir}${inpath}/complex1_output_MASTER.txt"
|
||||||
newfile="${inpath}${processed_path}/complex1_output_processed.txt"
|
newfile="${homedir}${basedir}${inpath}/complex1_output_processed.txt"
|
||||||
cp $oldfile $newfile
|
cp $oldfile $newfile
|
||||||
|
|
||||||
infile="/complex1_output_processed.txt"
|
echo Input filename is ${oldfile}
|
||||||
filename="${inpath}${processed_path}${infile}"
|
echo
|
||||||
|
echo Output i.e copied filename is ${newfile}
|
||||||
|
|
||||||
echo Input filename is : ${filename}
|
# output: No output perse
|
||||||
|
# Replacement in place inside the copied file
|
||||||
|
################## end of variable assignment for input and output files
|
||||||
|
|
||||||
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${filename} \
|
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${newfile} \
|
||||||
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${filename}
|
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${newfile}
|
||||||
|
|
||||||
# Outputs records separated by a newline, that look something like this:
|
# Outputs records separated by a newline, that look something like this:
|
||||||
# PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing
|
# PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing
|
||||||
|
@ -66,4 +71,4 @@ N
|
||||||
N
|
N
|
||||||
s/\n//g
|
s/\n//g
|
||||||
}
|
}
|
||||||
/^$/d' ${filename}
|
/^$/d' ${newfile}
|
||||||
|
|
|
@ -21,20 +21,22 @@ from collections import defaultdict
|
||||||
# output: formatted .csv file
|
# output: formatted .csv file
|
||||||
# path: "Data/<drug>/input/processed/<filename>"
|
# path: "Data/<drug>/input/processed/<filename>"
|
||||||
#***********************************************************************
|
#***********************************************************************
|
||||||
# specify variables for input and output paths and filenames
|
############# specify variables for input and output paths and filenames
|
||||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
||||||
|
|
||||||
basedir = "/git/Data/pyrazinamide/input"
|
basedir = "/git/Data/pyrazinamide/input"
|
||||||
|
|
||||||
|
# input
|
||||||
inpath = "/processed"
|
inpath = "/processed"
|
||||||
in_filename = "/complex1_output_processed.txt"
|
in_filename = "/complex1_output_processed.txt"
|
||||||
infile = homedir + basedir + inpath + in_filename
|
infile = homedir + basedir + inpath + in_filename
|
||||||
print("Input file is:", infile)
|
print("Input file is:", infile)
|
||||||
|
|
||||||
|
# output
|
||||||
outpath = "/processed"
|
outpath = "/processed"
|
||||||
out_filename = "/complex1_formatted_results.csv"
|
out_filename = "/complex1_formatted_results.csv"
|
||||||
outfile = homedir + basedir + outpath + out_filename
|
outfile = homedir + basedir + outpath + out_filename
|
||||||
print("Output file is:", outfile)
|
print("Output file is:", outfile)
|
||||||
# end of variable assignment for input and output files
|
################## end of variable assignment for input and output files
|
||||||
|
|
||||||
outCols=[
|
outCols=[
|
||||||
'PredictedAffinityChange',
|
'PredictedAffinityChange',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue