41 lines
1.6 KiB
Bash
Executable file
41 lines
1.6 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
|
|
|
|
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
|
|
# copy your snp file to split into the dynamut dir
|
|
# use sed to add chain ID to snp file and then split to avoid post processing
|
|
|
|
INFILE=$1
|
|
OUTDIR=$2
|
|
CHUNK=$3
|
|
|
|
mkdir -p ${OUTDIR}/${CHUNK}/chain_added
|
|
cd ${OUTDIR}/${CHUNK}/chain_added
|
|
|
|
# makes the 3 dirs, hence ../..
|
|
split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
|
|
|
|
########################################################################
|
|
# use cases
|
|
# Date: 20/09/2021
|
|
# sed -e 's/^/A /g' katg_mcsm_formatted_snps.csv > katg_mcsm_formatted_snps_chain.csv
|
|
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 50
|
|
|
|
# Date: 01/10/2021
|
|
# sed -e 's/^/A /g' rpob_mcsm_formatted_snps.csv > rpob_mcsm_formatted_snps_chain.csv
|
|
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 50
|
|
|
|
# Date: 02/10/2021
|
|
# sed -e 's/^/A /g' alr_mcsm_formatted_snps.csv > alr_mcsm_formatted_snps_chain.csv
|
|
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 50
|
|
|
|
# Date: 05/10/2021
|
|
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 20
|
|
|
|
# Date: 30/11/2021
|
|
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 20
|
|
for i in {00..40}; do mv snp_batch_${i} snp_batch_${i}.txt; done
|
|
|
|
# add .txt to the files
|
|
########################################################################
|