finding seq discrepancy in MSA for embb

This commit is contained in:
Tanushree Tunstall 2022-01-17 19:11:10 +00:00
parent af04c69d66
commit 68a092037b
4 changed files with 36 additions and 9 deletions

View file

@ -30,6 +30,8 @@ while read -r gene drug; do
echo "Running mutate.py on data file $MSA_MAP"
python3 mutate.py -v -o ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv $MSA_MAP $DATA_DIR/${drug}/input/${gene}_f2.fasta
echo "mutate.py completed"
sed -E 's/>.*//g;/^$/d' ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv > ${DATA_DIR}/${drug}/output/${gene}_msa.csv
wc -l ${DATA_DIR}/${drug}/output/${gene}_msa.csv
echo
done < gene_targets_names.txt
@ -37,14 +39,17 @@ done < gene_targets_names.txt
# Stop here so we don't run the examples below :)
exit
########################################################################
#
########################################################################
# make sure there is no new line at the end of the mutation file (snps.csv)
# check
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | head
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort | head
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv
sed -i 's/^/gid,/' gid_msa_snp.csv
#cp gid_msa_snp.csv gid_mut_map.csv