added split_csv.sh

This commit is contained in:
Tanushree Tunstall 2021-02-11 13:42:14 +00:00
parent 2eab17cb9e
commit cfe9028a9c
2 changed files with 44 additions and 18 deletions

18
dynamut/split_csv.sh Executable file
View file

@ -0,0 +1,18 @@
#!/bin/bash
# FIXME: This is written for expediency to kickstart running dynamut and mcsm-NA
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
# copy your snp file to split into the dynamut dir
INFILE=$1
OUTDIR=$2
CHUNK=$3
mkdir -p ${OUTDIR}/${CHUNK}
cd ${OUTDIR}/${CHUNK}
split ../../${INFILE} -l ${CHUNK} -d snp_batch_
# use case
#~/git/LSHTM_analysis/dynamut/split_csv.sh gid_mcsm_formatted_snps.csv snp_batches 50

View file

@ -16,17 +16,34 @@ from bs4 import BeautifulSoup
import pandas as pd import pandas as pd
from pandas.api.types import is_string_dtype from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype from pandas.api.types import is_numeric_dtype
#%% homedir
homedir = os.path.expanduser('~')
print('My homedir is:', homedir)
#%% #%%
host = 'http://biosig.unimelb.edu.au' host = 'http://biosig.unimelb.edu.au'
prediction_url = f"{host}/dynamut/prediction_list" prediction_url = f"{host}/dynamut/prediction_list"
print(prediction_url) print(prediction_url)
#%% #%% example params
#def format_data(data_file): gene_name = 'gid'
drug = 'streptomycin'
datadir = homedir + '/git/Data'
indir = datadir + '/' + drug + '/input'
#outdir = datadir + '/' + drug + '/output'
outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example
dynamut_temp_dir = outdir + '/dynamut_temp'
if not os.path.exists(dynamut_temp_dir):
print('Creating dynamut_temp in outdir', outdir )
os.makedirs(dynamut_temp_dir)
batch_no = 1
out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
#%% request calculation (no def) #%% request calculation (no def)
output_dir = "/home/tanu/git/LSHTM_analysis/dynamut"
gene_name = 'gid'
with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv") as mutation_list: with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv") as mutation_list:
files = {"wild": pdb_file files = {"wild": pdb_file
, "mutation_list": mutation_list} , "mutation_list": mutation_list}
@ -43,21 +60,11 @@ with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb
#=============== #===============
# writing file: result urls # writing file: result urls
#=============== #===============
out_url_file = output_dir + '/' + gene_name.lower() + '_snp_batch' + '_result_url.txt' out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt'
print(out_url_file) print('Writing output url file:', out_url_file)
myfile = open(out_url_file, 'a') myfile = open(out_url_file, 'a')
myfile.write(url + '\n') myfile.write(url)
myfile.close() myfile.close()
else:
print('ERROR: invalid mutation! Wild-type residue doesn\'t match pdb file.'
, '\nSkipping to the next mutation in file...')
#===============
# writing file: invalid mutations
#===============
out_error_file = output_dir + '/' + gene_name.lower() + '_errors.txt'
failed_muts = open(out_error_file, 'a')
failed_muts.write(mutation_list + '\n')
failed_muts.close()
#%% #%%
def request_calculation(pdb_file, mutation_list def request_calculation(pdb_file, mutation_list
@ -66,7 +73,8 @@ def request_calculation(pdb_file, mutation_list
, prediction_url , prediction_url
, output_dir , output_dir
, gene_name , gene_name
, url_file): , batch_no
, out_url_file):
""" """
Makes a POST request for a ligand affinity prediction. Makes a POST request for a ligand affinity prediction.