From 63ec8a1c374c4285ef17104a3661c9923f0a571e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 29 Oct 2021 14:00:25 +0100 Subject: [PATCH] added split_csv_chain.sh for mCSM-NA analysis in scripts/ --- mcsm_na/split_csv_chain.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 mcsm_na/split_csv_chain.sh diff --git a/mcsm_na/split_csv_chain.sh b/mcsm_na/split_csv_chain.sh new file mode 100755 index 0000000..263bbb6 --- /dev/null +++ b/mcsm_na/split_csv_chain.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20) + +# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh +# copy your snp file to split into the dynamut dir +# use sed to add chain ID to snp file and then split to avoid post processing + +INFILE=$1 +OUTDIR=$2 +CHUNK=$3 + +mkdir -p ${OUTDIR}/${CHUNK}/chain_added +cd ${OUTDIR}/${CHUNK}/chain_added + +# makes the 3 dirs, hence ../.. +split ../../../${INFILE} -l ${CHUNK} -d snp_batch_ + +######################################################################## +# use cases + +# Date: 29/10/2021, 5UHC (for rifampicin) +~/git/LSHTM_analysis/mcsm_na/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 20 + +# add .txt to the files +for i in {00..56}; do mv snp_batch_${i} snp_batch_${i}_chain.txt; done +########################################################################