#!/bin/bash set -e # Metadata files edited: # rpob: remove positions above 1151 # embb: remove positions above 1054 #======================================================================= #https://www.biostars.org/p/336891/ #python Mutate.py -v -o /path/to/output.fasta mutation_map_file.csv input.fasta #======================================================================= DATA_DIR="/home/tanu/git/Data" echo "Processing Mutation Files" echo "-------------------------" echo while read -r gene drug; do echo "vvvvvvvvvvvvvvvvvvvvv" echo "gene: $gene drug: $drug"; echo "Source File: ${DATA_DIR}/${drug}/output/${gene}_metadata.csv" MSA_MAP=${DATA_DIR}/${drug}/output/${gene}_msa_map.csv wc -l ${DATA_DIR}/${drug}/output/${gene}_metadata.csv cat ${DATA_DIR}/${drug}/output/${gene}_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > ${DATA_DIR}/${drug}/output/${gene}_metadata_mut_count.csv cat ${DATA_DIR}/${drug}/output/${gene}_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | sed -e 's/"//g' > $MSA_MAP echo "Output to: $MSA_MAP" sed -i "s/^/${gene},/" $MSA_MAP wc -l $MSA_MAP echo "^^^^^^^^^^^^^^^^^^^^^" echo echo "Running mutate.py on data file $MSA_MAP" python3 mutate.py -v -o ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv $MSA_MAP $DATA_DIR/${drug}/input/${gene}_f2.fasta echo "mutate.py completed" echo done < gene_targets_names.txt # Stop here so we don't run the examples below :) exit # make sure there is no new line at the end of the mutation file (snps.csv) # check cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | head cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv sed -i 's/^/gid,/' gid_msa_snp.csv #cp gid_msa_snp.csv gid_mut_map.csv #%% # Date: 16/01/22 # pre processing sed 's/"//g' gene_msa_snp.csv > gid_mut_map.csv # mut prefix for mutation map file MUST match fasta file header python3 mutate.py -v -o /home/tanu/git/Data/streptomycin/output/TEST2.csv /home/tanu/git/Data/streptomycin/output/gid_mut_map.csv /home/tanu/git/Data/streptomycin/input/gid2.fasta wc -l TEST2.csv # post processing sed -E 's/>.*//g' TEST2.csv | sed '/^$/d' > TEST3.csv wc -l TEST3.csv #==============================================