A MAAAADDD MAAADDD DAYYYYY,messy embb numbering agrrrhhhhh
This commit is contained in:
parent
07aedfe286
commit
af04c69d66
2 changed files with 72 additions and 0 deletions
7
scripts/gene_targets_names.txt
Normal file
7
scripts/gene_targets_names.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
embb ethambutol
|
||||||
|
rpob rifampicin
|
||||||
|
alr cycloserine
|
||||||
|
katg isoniazid
|
||||||
|
pnca pyrazinamide
|
||||||
|
gid streptomycin
|
||||||
|
|
65
scripts/run_mutate2.sh
Executable file
65
scripts/run_mutate2.sh
Executable file
|
@ -0,0 +1,65 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Metadata files edited:
|
||||||
|
# rpob: remove positions above 1151
|
||||||
|
# embb: remove positions above 1054
|
||||||
|
#=======================================================================
|
||||||
|
#https://www.biostars.org/p/336891/
|
||||||
|
#python Mutate.py -v -o /path/to/output.fasta mutation_map_file.csv input.fasta
|
||||||
|
#=======================================================================
|
||||||
|
DATA_DIR="/home/tanu/git/Data"
|
||||||
|
|
||||||
|
echo "Processing Mutation Files"
|
||||||
|
echo "-------------------------"
|
||||||
|
echo
|
||||||
|
|
||||||
|
while read -r gene drug; do
|
||||||
|
echo "vvvvvvvvvvvvvvvvvvvvv"
|
||||||
|
echo "gene: $gene drug: $drug";
|
||||||
|
echo "Source File: ${DATA_DIR}/${drug}/output/${gene}_metadata.csv"
|
||||||
|
MSA_MAP=${DATA_DIR}/${drug}/output/${gene}_msa_map.csv
|
||||||
|
wc -l ${DATA_DIR}/${drug}/output/${gene}_metadata.csv
|
||||||
|
cat ${DATA_DIR}/${drug}/output/${gene}_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > ${DATA_DIR}/${drug}/output/${gene}_metadata_mut_count.csv
|
||||||
|
cat ${DATA_DIR}/${drug}/output/${gene}_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | sed -e 's/"//g' > $MSA_MAP
|
||||||
|
echo "Output to: $MSA_MAP"
|
||||||
|
sed -i "s/^/${gene},/" $MSA_MAP
|
||||||
|
wc -l $MSA_MAP
|
||||||
|
echo "^^^^^^^^^^^^^^^^^^^^^"
|
||||||
|
echo
|
||||||
|
echo "Running mutate.py on data file $MSA_MAP"
|
||||||
|
python3 mutate.py -v -o ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv $MSA_MAP $DATA_DIR/${drug}/input/${gene}_f2.fasta
|
||||||
|
echo "mutate.py completed"
|
||||||
|
echo
|
||||||
|
|
||||||
|
done < gene_targets_names.txt
|
||||||
|
|
||||||
|
# Stop here so we don't run the examples below :)
|
||||||
|
exit
|
||||||
|
|
||||||
|
# make sure there is no new line at the end of the mutation file (snps.csv)
|
||||||
|
# check
|
||||||
|
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | head
|
||||||
|
|
||||||
|
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv
|
||||||
|
|
||||||
|
|
||||||
|
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv
|
||||||
|
sed -i 's/^/gid,/' gid_msa_snp.csv
|
||||||
|
|
||||||
|
#cp gid_msa_snp.csv gid_mut_map.csv
|
||||||
|
#%%
|
||||||
|
# Date: 16/01/22
|
||||||
|
# pre processing
|
||||||
|
sed 's/"//g' gene_msa_snp.csv > gid_mut_map.csv
|
||||||
|
|
||||||
|
# mut prefix for mutation map file MUST match fasta file header
|
||||||
|
python3 mutate.py -v -o /home/tanu/git/Data/streptomycin/output/TEST2.csv /home/tanu/git/Data/streptomycin/output/gid_mut_map.csv /home/tanu/git/Data/streptomycin/input/gid2.fasta
|
||||||
|
|
||||||
|
wc -l TEST2.csv
|
||||||
|
|
||||||
|
# post processing
|
||||||
|
sed -E 's/>.*//g' TEST2.csv | sed '/^$/d' > TEST3.csv
|
||||||
|
wc -l TEST3.csv
|
||||||
|
|
||||||
|
#==============================================
|
Loading…
Add table
Add a link
Reference in a new issue