consistent input & output variables for filenames to run mcsm
This commit is contained in:
parent
ef99167679
commit
e3f4c630a1
4 changed files with 71 additions and 55 deletions
|
@ -15,35 +15,40 @@
|
|||
|
||||
# NOTE: these are just result urls, not actual values for results
|
||||
#**********************************************************************
|
||||
# specify variables for input and output paths and filenames
|
||||
############# specify variables for input and output paths and filenames
|
||||
homedir="${HOME}"
|
||||
#echo Home directory is ${homedir}
|
||||
basedir="/git/Data/pyrazinamide/input"
|
||||
|
||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||
processed_path="/processed"
|
||||
struc_path="/structure"
|
||||
infile_mut="/pnca_mis_SNPs_v2_unique.csv"
|
||||
infile_struc="/complex1_no_water.pdb"
|
||||
# input
|
||||
inpath_mut="/processed"
|
||||
in_filename_mut="/pnca_mis_SNPs_v2_unique.csv"
|
||||
infile_mut="${homedir}${basedir}${inpath_mut}${in_filename_mut}"
|
||||
echo Input Mut filename: ${infile_mut}
|
||||
|
||||
outpath="${inpath}${processed_path}"
|
||||
outfile="/complex1_result_url.txt"
|
||||
inpath_struc="/structure"
|
||||
in_filename_struc="/complex1_no_water.pdb"
|
||||
infile_struc="${homedir}${basedir}${inpath_struc}${in_filename_struc}"
|
||||
echo Input Struc filename: ${infile_struc}
|
||||
|
||||
# create valid input and output filenames
|
||||
#filename="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2_unique.csv"
|
||||
filename="${inpath}${processed_path}${infile_mut}"
|
||||
echo Input File is: ${filename}
|
||||
# output
|
||||
outpath="/processed"
|
||||
out_filename="/complex1_result_url.txt"
|
||||
outfile="${homedir}${basedir}${outpath}${out_filename}"
|
||||
#echo Output filename: ${outfile}
|
||||
################## end of variable assignment for input and output files
|
||||
|
||||
outfilename="${outpath}${outfile}"
|
||||
echo Output File will be: ${outfilename}
|
||||
|
||||
# iterate over mutation file; line by line and submit query using curl
|
||||
# iterate over mutation file (infile_mut); line by line and
|
||||
# submit query using curl
|
||||
# some useful messages
|
||||
echo -n -e "Processing $(wc -l < ${filename}) entries from ${infile_mut}\n"
|
||||
echo -n -e "Processing $(wc -l < ${infile_mut}) entries from ${infile_mut}\n"
|
||||
COUNT=0
|
||||
while read -r line; do
|
||||
((COUNT++))
|
||||
mutation="${line}"
|
||||
# echo "${mutation}"
|
||||
#pdb='../Data/complex1_no_water.pdb'
|
||||
pdb="${inpath}${struc_path}${infile_struc}"
|
||||
pdb="${infile_struc}"
|
||||
mutation="${mutation}"
|
||||
chain="A"
|
||||
lig_id="PZA"
|
||||
|
@ -51,7 +56,7 @@ affin_wt="0.99"
|
|||
host="http://biosig.unimelb.edu.au"
|
||||
call_url="/mcsm_lig/prediction"
|
||||
|
||||
##=========================================
|
||||
#=========================================
|
||||
##html field_names names required for curl
|
||||
##complex_field:wild=@
|
||||
##mutation_field:mutation=@
|
||||
|
@ -78,19 +83,20 @@ refresh_url=$(curl -L \
|
|||
result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g')
|
||||
sleep 10
|
||||
|
||||
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${filename})..."
|
||||
echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${infile_mut})..."
|
||||
|
||||
# create output file with the added number of muts from file
|
||||
# after much thought, bad idea as less generic!
|
||||
#echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_complex1_result_url.txt
|
||||
echo -e "${host}${result_url}" >> ${outfilename}
|
||||
echo -e "${host}${result_url}" >> ${outfile}
|
||||
#echo -n '.'
|
||||
done < "${filename}"
|
||||
done < "${infile_mut}"
|
||||
|
||||
#FIXME: stop executing if error else these echo statements are misleading!
|
||||
echo
|
||||
echo Output filename: ${outfilename}
|
||||
echo Output filename: ${outfile}
|
||||
echo
|
||||
echo Number of urls saved: $(wc -l < ${filename})
|
||||
echo Number of urls saved: $(wc -l < ${infile_mut})
|
||||
echo
|
||||
echo "Processing Complete"
|
||||
|
||||
|
|
|
@ -32,24 +32,27 @@
|
|||
#outfile=$2
|
||||
############################ end of code block to make command line args
|
||||
|
||||
# specify variables for input and output paths and filenames
|
||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||
processed_path="/processed"
|
||||
infile="/complex1_result_url.txt"
|
||||
############# specify variables for input and output paths and filenames
|
||||
homedir="${HOME}"
|
||||
#echo Home directory is ${homedir}
|
||||
basedir="/git/Data/pyrazinamide/input"
|
||||
|
||||
outpath="${inpath}${processed_path}"
|
||||
outfile="/complex1_output_MASTER.txt"
|
||||
# input
|
||||
inpath="/processed"
|
||||
in_filename="/complex1_result_url.txt"
|
||||
infile="${homedir}${basedir}${inpath}${in_filename}"
|
||||
echo Input Mut filename: ${infile}
|
||||
|
||||
# create valid input and output filenames
|
||||
filename="${inpath}${processed_path}${infile}"
|
||||
echo Input File is: ${filename}
|
||||
|
||||
outfilename="${outpath}${outfile}"
|
||||
echo Output File will be: ${outfilename}
|
||||
# output
|
||||
outpath="/processed"
|
||||
out_filename="/complex1_output_MASTER.txt"
|
||||
outfile="${homedir}${basedir}${outpath}${out_filename}"
|
||||
echo Output filename: ${outfile}
|
||||
################## end of variable assignment for input and output files
|
||||
|
||||
# Iterate over each result url, and extract results using hxtools
|
||||
# which nicely cleans and formats html
|
||||
echo -n "Processing $(wc -l < ${filename}) entries from ${infile}"
|
||||
echo -n "Processing $(wc -l < ${infile}) entries from ${infile}"
|
||||
echo
|
||||
COUNT=0
|
||||
while read -r line; do
|
||||
|
@ -61,12 +64,12 @@ while read -r line; do
|
|||
| hxselect -c div.well \
|
||||
| sed -r -e 's/<[^>]*>//g' \
|
||||
| sed -re 's/ +//g' \
|
||||
>> ${outfilename}
|
||||
#| tee -a ${outfilename}
|
||||
>> ${outfile}
|
||||
#| tee -a ${outfile}
|
||||
# echo -n '.'
|
||||
echo -e "Processing entry ${COUNT}/$(wc -l < ${filename})..."
|
||||
echo -e "Processing entry ${COUNT}/$(wc -l < ${infile})..."
|
||||
|
||||
done < "${filename}"
|
||||
done < "${infile}"
|
||||
|
||||
echo
|
||||
echo "Processing Complete"
|
||||
|
|
|
@ -22,22 +22,27 @@
|
|||
# the output is a txt file with no newlines and formatting
|
||||
# to have the following format "<colname><:><value>
|
||||
#***********************************************************************
|
||||
# specify variables for input and output paths and filenames
|
||||
inpath="${HOME}/git/Data/pyrazinamide/input"
|
||||
processed_path="/processed"
|
||||
############# specify variables for input and output paths and filenames
|
||||
homedir="${HOME}"
|
||||
basedir="/git/Data/pyrazinamide/input"
|
||||
|
||||
inpath="/processed"
|
||||
|
||||
# Create input file: copy and rename output file of step2
|
||||
oldfile="${inpath}${processed_path}/complex1_output_MASTER.txt"
|
||||
newfile="${inpath}${processed_path}/complex1_output_processed.txt"
|
||||
oldfile="${homedir}${basedir}${inpath}/complex1_output_MASTER.txt"
|
||||
newfile="${homedir}${basedir}${inpath}/complex1_output_processed.txt"
|
||||
cp $oldfile $newfile
|
||||
|
||||
infile="/complex1_output_processed.txt"
|
||||
filename="${inpath}${processed_path}${infile}"
|
||||
echo Input filename is ${oldfile}
|
||||
echo
|
||||
echo Output i.e copied filename is ${newfile}
|
||||
|
||||
echo Input filename is : ${filename}
|
||||
# output: No output perse
|
||||
# Replacement in place inside the copied file
|
||||
################## end of variable assignment for input and output files
|
||||
|
||||
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${filename} \
|
||||
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${filename}
|
||||
#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${newfile} \
|
||||
# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${newfile}
|
||||
|
||||
# Outputs records separated by a newline, that look something like this:
|
||||
# PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing
|
||||
|
@ -66,4 +71,4 @@ N
|
|||
N
|
||||
s/\n//g
|
||||
}
|
||||
/^$/d' ${filename}
|
||||
/^$/d' ${newfile}
|
||||
|
|
|
@ -21,20 +21,22 @@ from collections import defaultdict
|
|||
# output: formatted .csv file
|
||||
# path: "Data/<drug>/input/processed/<filename>"
|
||||
#***********************************************************************
|
||||
# specify variables for input and output paths and filenames
|
||||
############# specify variables for input and output paths and filenames
|
||||
homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde
|
||||
|
||||
basedir = "/git/Data/pyrazinamide/input"
|
||||
|
||||
# input
|
||||
inpath = "/processed"
|
||||
in_filename = "/complex1_output_processed.txt"
|
||||
infile = homedir + basedir + inpath + in_filename
|
||||
print("Input file is:", infile)
|
||||
|
||||
# output
|
||||
outpath = "/processed"
|
||||
out_filename = "/complex1_formatted_results.csv"
|
||||
outfile = homedir + basedir + outpath + out_filename
|
||||
print("Output file is:", outfile)
|
||||
# end of variable assignment for input and output files
|
||||
################## end of variable assignment for input and output files
|
||||
|
||||
outCols=[
|
||||
'PredictedAffinityChange',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue