added separate script for splitting csv after adding chain ID. saves lots of post processing

This commit is contained in:
Tanushree Tunstall 2021-09-20 16:13:15 +01:00
parent daa3556ede
commit d443ecea6b

22
dynamut/split_csv_chain.sh Executable file
View file

@ -0,0 +1,22 @@
#!/bin/bash
# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
# copy your snp file to split into the dynamut dir
# use sed to add chain ID to snp file and then split to avoid post processing
INFILE=$1
OUTDIR=$2
CHUNK=$3
mkdir -p ${OUTDIR}/${CHUNK}/chain_added
cd ${OUTDIR}/${CHUNK}/chain_added
# makes the 2 dirs, hence ../..
split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
# use case
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps.csv snp_batches 50 #Date: 20/09/2021
# add .txt to the files