35 lines
1.6 KiB
Bash
35 lines
1.6 KiB
Bash
#!/bin/bash
|
|
|
|
#=======================================================================
|
|
#https://www.biostars.org/p/336891/
|
|
#python Mutate.py -v -o /path/to/output.fasta mutation_map_file.csv input.fasta
|
|
#=======================================================================
|
|
|
|
|
|
# make sure there is no new line at the end of the mutation file (snps.csv)
|
|
#python3 Mutate.py -v -o /home/tanu/git/Data/pyrazinamide/input/output.fasta mut_map.csv 3pl1.fasta.txt
|
|
python3 mutate.py -v -o /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt /home/tanu/git/Data/pyrazinamide/output/pnca_all_muts_msa.csv /home/tanu/git/Data/pyrazinamide/input/3pl1.fasta.txt
|
|
|
|
# remove fasta style header lines in the output i.e
|
|
# lines beginning with '>' so the file is just the mutated seqs
|
|
sed -i '/^>.*$/d' /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt
|
|
printf 'No. of lines after cleaning: '
|
|
cat /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt | wc -l
|
|
|
|
#%%
|
|
# Date: 13/01/22
|
|
# pre processing
|
|
sed 's/"//g' gene_msa_snp.csv > gid_mut_map.csv
|
|
|
|
# mut prefix for mutation map file MUST match fasta file header
|
|
python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST2.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/gid_mut_map.csv /home/tanu/git/Data/streptomycin/input/gid2.fasta
|
|
|
|
wc -l TEST2.csv
|
|
|
|
# post processing
|
|
sed -E 's/>.*//g' TEST2.csv | sed '/^$/d' > TEST3.csv
|
|
wc -l TEST3.csv
|
|
|
|
# doubles the no as it adds the mut info
|
|
python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/pnca_mut_map.csv /home/tanu/git/Data/pyrazinamide/input/pnca.fasta
|
|
|