diff --git a/mcsm_analysis/pyrazinamide/scripts/mcsm/step1_lig_output_urls.sh b/mcsm_analysis/pyrazinamide/scripts/mcsm/step1_lig_output_urls.sh index 83dab94..6361b62 100755 --- a/mcsm_analysis/pyrazinamide/scripts/mcsm/step1_lig_output_urls.sh +++ b/mcsm_analysis/pyrazinamide/scripts/mcsm/step1_lig_output_urls.sh @@ -15,35 +15,40 @@ # NOTE: these are just result urls, not actual values for results #********************************************************************** -# specify variables for input and output paths and filenames +############# specify variables for input and output paths and filenames +homedir="${HOME}" +#echo Home directory is ${homedir} +basedir="/git/Data/pyrazinamide/input" -inpath="${HOME}/git/Data/pyrazinamide/input" -processed_path="/processed" -struc_path="/structure" -infile_mut="/pnca_mis_SNPs_v2_unique.csv" -infile_struc="/complex1_no_water.pdb" +# input +inpath_mut="/processed" +in_filename_mut="/pnca_mis_SNPs_v2_unique.csv" +infile_mut="${homedir}${basedir}${inpath_mut}${in_filename_mut}" +echo Input Mut filename: ${infile_mut} -outpath="${inpath}${processed_path}" -outfile="/complex1_result_url.txt" +inpath_struc="/structure" +in_filename_struc="/complex1_no_water.pdb" +infile_struc="${homedir}${basedir}${inpath_struc}${in_filename_struc}" +echo Input Struc filename: ${infile_struc} -# create valid input and output filenames -#filename="${HOME}/git/Data/pyrazinamide/input/processed/pnca_mis_SNPs_v2_unique.csv" -filename="${inpath}${processed_path}${infile_mut}" -echo Input File is: ${filename} +# output +outpath="/processed" +out_filename="/complex1_result_url.txt" +outfile="${homedir}${basedir}${outpath}${out_filename}" +#echo Output filename: ${outfile} +################## end of variable assignment for input and output files -outfilename="${outpath}${outfile}" -echo Output File will be: ${outfilename} - -# iterate over mutation file; line by line and submit query using curl +# iterate over mutation file (infile_mut); line by line and +# submit query using curl # some useful messages -echo -n -e "Processing $(wc -l < ${filename}) entries from ${infile_mut}\n" +echo -n -e "Processing $(wc -l < ${infile_mut}) entries from ${infile_mut}\n" COUNT=0 while read -r line; do ((COUNT++)) mutation="${line}" # echo "${mutation}" #pdb='../Data/complex1_no_water.pdb' -pdb="${inpath}${struc_path}${infile_struc}" +pdb="${infile_struc}" mutation="${mutation}" chain="A" lig_id="PZA" @@ -51,7 +56,7 @@ affin_wt="0.99" host="http://biosig.unimelb.edu.au" call_url="/mcsm_lig/prediction" -##========================================= +#========================================= ##html field_names names required for curl ##complex_field:wild=@ ##mutation_field:mutation=@ @@ -78,19 +83,20 @@ refresh_url=$(curl -L \ result_url=$(echo $refresh_url | sed -r 's/.*(\/mcsm.*)".*$/\1/g') sleep 10 -echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${filename})..." +echo -e "${mutation} : processing entry ${COUNT}/$(wc -l < ${infile_mut})..." # create output file with the added number of muts from file # after much thought, bad idea as less generic! #echo -e "${host}${result_url}" >> ../Results/$(wc -l < ${filename})_complex1_result_url.txt -echo -e "${host}${result_url}" >> ${outfilename} +echo -e "${host}${result_url}" >> ${outfile} #echo -n '.' -done < "${filename}" +done < "${infile_mut}" +#FIXME: stop executing if error else these echo statements are misleading! echo -echo Output filename: ${outfilename} +echo Output filename: ${outfile} echo -echo Number of urls saved: $(wc -l < ${filename}) +echo Number of urls saved: $(wc -l < ${infile_mut}) echo echo "Processing Complete" diff --git a/mcsm_analysis/pyrazinamide/scripts/mcsm/step2_lig_results.sh b/mcsm_analysis/pyrazinamide/scripts/mcsm/step2_lig_results.sh index 10c9291..51a7844 100755 --- a/mcsm_analysis/pyrazinamide/scripts/mcsm/step2_lig_results.sh +++ b/mcsm_analysis/pyrazinamide/scripts/mcsm/step2_lig_results.sh @@ -32,24 +32,27 @@ #outfile=$2 ############################ end of code block to make command line args -# specify variables for input and output paths and filenames -inpath="${HOME}/git/Data/pyrazinamide/input" -processed_path="/processed" -infile="/complex1_result_url.txt" +############# specify variables for input and output paths and filenames +homedir="${HOME}" +#echo Home directory is ${homedir} +basedir="/git/Data/pyrazinamide/input" -outpath="${inpath}${processed_path}" -outfile="/complex1_output_MASTER.txt" +# input +inpath="/processed" +in_filename="/complex1_result_url.txt" +infile="${homedir}${basedir}${inpath}${in_filename}" +echo Input Mut filename: ${infile} -# create valid input and output filenames -filename="${inpath}${processed_path}${infile}" -echo Input File is: ${filename} - -outfilename="${outpath}${outfile}" -echo Output File will be: ${outfilename} +# output +outpath="/processed" +out_filename="/complex1_output_MASTER.txt" +outfile="${homedir}${basedir}${outpath}${out_filename}" +echo Output filename: ${outfile} +################## end of variable assignment for input and output files # Iterate over each result url, and extract results using hxtools # which nicely cleans and formats html -echo -n "Processing $(wc -l < ${filename}) entries from ${infile}" +echo -n "Processing $(wc -l < ${infile}) entries from ${infile}" echo COUNT=0 while read -r line; do @@ -61,12 +64,12 @@ while read -r line; do | hxselect -c div.well \ | sed -r -e 's/<[^>]*>//g' \ | sed -re 's/ +//g' \ - >> ${outfilename} - #| tee -a ${outfilename} + >> ${outfile} + #| tee -a ${outfile} # echo -n '.' -echo -e "Processing entry ${COUNT}/$(wc -l < ${filename})..." +echo -e "Processing entry ${COUNT}/$(wc -l < ${infile})..." -done < "${filename}" +done < "${infile}" echo echo "Processing Complete" diff --git a/mcsm_analysis/pyrazinamide/scripts/mcsm/step3a_results_format_interim.sh b/mcsm_analysis/pyrazinamide/scripts/mcsm/step3a_results_format_interim.sh index f9c2c09..0861996 100755 --- a/mcsm_analysis/pyrazinamide/scripts/mcsm/step3a_results_format_interim.sh +++ b/mcsm_analysis/pyrazinamide/scripts/mcsm/step3a_results_format_interim.sh @@ -22,22 +22,27 @@ # the output is a txt file with no newlines and formatting # to have the following format "<:> #*********************************************************************** -# specify variables for input and output paths and filenames -inpath="${HOME}/git/Data/pyrazinamide/input" -processed_path="/processed" +############# specify variables for input and output paths and filenames +homedir="${HOME}" +basedir="/git/Data/pyrazinamide/input" + +inpath="/processed" # Create input file: copy and rename output file of step2 -oldfile="${inpath}${processed_path}/complex1_output_MASTER.txt" -newfile="${inpath}${processed_path}/complex1_output_processed.txt" +oldfile="${homedir}${basedir}${inpath}/complex1_output_MASTER.txt" +newfile="${homedir}${basedir}${inpath}/complex1_output_processed.txt" cp $oldfile $newfile -infile="/complex1_output_processed.txt" -filename="${inpath}${processed_path}${infile}" +echo Input filename is ${oldfile} +echo +echo Output i.e copied filename is ${newfile} -echo Input filename is : ${filename} +# output: No output perse +# Replacement in place inside the copied file +################## end of variable assignment for input and output files -#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${filename} \ -# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${filename} +#sed -i '/PredictedAffinityChange:/ { N; N; N; N; s/\n//g;}' ${newfile} \ +# | sed -i '/DUETstabilitychange:/ {x; N; N; s/\n//g; p;d;}' ${newfile} # Outputs records separated by a newline, that look something like this: # PredictedAffinityChange:-2.2log(affinityfoldchange)-Destabilizing @@ -66,4 +71,4 @@ N N s/\n//g } -/^$/d' ${filename} +/^$/d' ${newfile} diff --git a/mcsm_analysis/pyrazinamide/scripts/mcsm/step3b_results_format_df.py b/mcsm_analysis/pyrazinamide/scripts/mcsm/step3b_results_format_df.py index bc14609..0e07c0d 100755 --- a/mcsm_analysis/pyrazinamide/scripts/mcsm/step3b_results_format_df.py +++ b/mcsm_analysis/pyrazinamide/scripts/mcsm/step3b_results_format_df.py @@ -21,20 +21,22 @@ from collections import defaultdict # output: formatted .csv file # path: "Data//input/processed/" #*********************************************************************** -# specify variables for input and output paths and filenames +############# specify variables for input and output paths and filenames homedir = os.path.expanduser('~') # spyder/python doesn't recognise tilde - basedir = "/git/Data/pyrazinamide/input" + +# input inpath = "/processed" in_filename = "/complex1_output_processed.txt" infile = homedir + basedir + inpath + in_filename print("Input file is:", infile) +# output outpath = "/processed" out_filename = "/complex1_formatted_results.csv" outfile = homedir + basedir + outpath + out_filename print("Output file is:", outfile) -# end of variable assignment for input and output files +################## end of variable assignment for input and output files outCols=[ 'PredictedAffinityChange',