From d443ecea6b6471e4fcfcc31c771d4857dce58af0 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 20 Sep 2021 16:13:15 +0100
Subject: [PATCH] added separate script for splitting csv after adding chain
 ID. saves lots of post processing

---
 dynamut/split_csv_chain.sh | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100755 dynamut/split_csv_chain.sh
diff --git a/dynamut/split_csv_chain.sh b/dynamut/split_csv_chain.sh
new file mode 100755
index 0000000..2526b3f
--- /dev/null
+++ b/dynamut/split_csv_chain.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
+
+# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
+# copy your snp file to split into the dynamut dir
+# use sed to add chain ID to snp file and then split to avoid post processing
+
+INFILE=$1
+OUTDIR=$2
+CHUNK=$3
+
+mkdir -p ${OUTDIR}/${CHUNK}/chain_added
+cd ${OUTDIR}/${CHUNK}/chain_added
+
+# makes the 2 dirs, hence ../..
+split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
+
+# use case
+#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps.csv snp_batches 50     #Date: 20/09/2021
+
+# add .txt to the files