consistent input & output variables for filenames to run mcsm

This commit is contained in:
Tanushree Tunstall 2020-01-13 12:16:13 +00:00
parent ef99167679
commit e3f4c630a1
4 changed files with 71 additions and 55 deletions

View file

@ -32,24 +32,27 @@
#outfile=$2
############################ end of code block to make command line args
# specify variables for input and output paths and filenames
inpath="${HOME}/git/Data/pyrazinamide/input"
processed_path="/processed"
infile="/complex1_result_url.txt"
############# specify variables for input and output paths and filenames
homedir="${HOME}"
#echo Home directory is ${homedir}
basedir="/git/Data/pyrazinamide/input"
outpath="${inpath}${processed_path}"
outfile="/complex1_output_MASTER.txt"
# input
inpath="/processed"
in_filename="/complex1_result_url.txt"
infile="${homedir}${basedir}${inpath}${in_filename}"
echo Input Mut filename: ${infile}
# create valid input and output filenames
filename="${inpath}${processed_path}${infile}"
echo Input File is: ${filename}
outfilename="${outpath}${outfile}"
echo Output File will be: ${outfilename}
# output
outpath="/processed"
out_filename="/complex1_output_MASTER.txt"
outfile="${homedir}${basedir}${outpath}${out_filename}"
echo Output filename: ${outfile}
################## end of variable assignment for input and output files
# Iterate over each result url, and extract results using hxtools
# which nicely cleans and formats html
echo -n "Processing $(wc -l < ${filename}) entries from ${infile}"
echo -n "Processing $(wc -l < ${infile}) entries from ${infile}"
echo
COUNT=0
while read -r line; do
@ -61,12 +64,12 @@ while read -r line; do
| hxselect -c div.well \
| sed -r -e 's/<[^>]*>//g' \
| sed -re 's/ +//g' \
>> ${outfilename}
#| tee -a ${outfilename}
>> ${outfile}
#| tee -a ${outfile}
# echo -n '.'
echo -e "Processing entry ${COUNT}/$(wc -l < ${filename})..."
echo -e "Processing entry ${COUNT}/$(wc -l < ${infile})..."
done < "${filename}"
done < "${infile}"
echo
echo "Processing Complete"