finding seq discrepancy in MSA for embb
This commit is contained in:
parent
af04c69d66
commit
68a092037b
4 changed files with 36 additions and 9 deletions
|
@ -30,6 +30,8 @@ while read -r gene drug; do
|
|||
echo "Running mutate.py on data file $MSA_MAP"
|
||||
python3 mutate.py -v -o ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv $MSA_MAP $DATA_DIR/${drug}/input/${gene}_f2.fasta
|
||||
echo "mutate.py completed"
|
||||
sed -E 's/>.*//g;/^$/d' ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv > ${DATA_DIR}/${drug}/output/${gene}_msa.csv
|
||||
wc -l ${DATA_DIR}/${drug}/output/${gene}_msa.csv
|
||||
echo
|
||||
|
||||
done < gene_targets_names.txt
|
||||
|
@ -37,14 +39,17 @@ done < gene_targets_names.txt
|
|||
# Stop here so we don't run the examples below :)
|
||||
exit
|
||||
|
||||
########################################################################
|
||||
#
|
||||
########################################################################
|
||||
# make sure there is no new line at the end of the mutation file (snps.csv)
|
||||
# check
|
||||
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | head
|
||||
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort | head
|
||||
|
||||
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv
|
||||
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort | uniq -c > output/gid_metadata_mut_count.csv
|
||||
|
||||
|
||||
cat output/gid_metadata.csv | rev| cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv
|
||||
cat output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort > gid_msa_snp.csv
|
||||
sed -i 's/^/gid,/' gid_msa_snp.csv
|
||||
|
||||
#cp gid_msa_snp.csv gid_mut_map.csv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue