diff --git a/dynamut/split_csv.sh b/dynamut/split_csv.sh new file mode 100755 index 0000000..5e7bfd9 --- /dev/null +++ b/dynamut/split_csv.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# FIXME: This is written for expediency to kickstart running dynamut and mcsm-NA + +# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh +# copy your snp file to split into the dynamut dir + +INFILE=$1 +OUTDIR=$2 +CHUNK=$3 + +mkdir -p ${OUTDIR}/${CHUNK} +cd ${OUTDIR}/${CHUNK} + +split ../../${INFILE} -l ${CHUNK} -d snp_batch_ + +# use case +#~/git/LSHTM_analysis/dynamut/split_csv.sh gid_mcsm_formatted_snps.csv snp_batches 50 diff --git a/dynamut/submit.py b/dynamut/submit.py index 7b8f0d1..3343dec 100755 --- a/dynamut/submit.py +++ b/dynamut/submit.py @@ -16,17 +16,34 @@ from bs4 import BeautifulSoup import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype +#%% homedir +homedir = os.path.expanduser('~') +print('My homedir is:', homedir) + #%% host = 'http://biosig.unimelb.edu.au' prediction_url = f"{host}/dynamut/prediction_list" print(prediction_url) -#%% -#def format_data(data_file): +#%% example params +gene_name = 'gid' +drug = 'streptomycin' +datadir = homedir + '/git/Data' +indir = datadir + '/' + drug + '/input' +#outdir = datadir + '/' + drug + '/output' +outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example + +dynamut_temp_dir = outdir + '/dynamut_temp' + +if not os.path.exists(dynamut_temp_dir): + print('Creating dynamut_temp in outdir', outdir ) + os.makedirs(dynamut_temp_dir) + +batch_no = 1 +out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' + #%% request calculation (no def) -output_dir = "/home/tanu/git/LSHTM_analysis/dynamut" -gene_name = 'gid' with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb_file, open ("/home/tanu/git/LSHTM_analysis/dynamut/snp_test2.csv") as mutation_list: files = {"wild": pdb_file , "mutation_list": mutation_list} @@ -43,21 +60,11 @@ with open("/home/tanu/git/Data/streptomycin/input/gid_complex.pdb", "rb") as pdb #=============== # writing file: result urls #=============== - out_url_file = output_dir + '/' + gene_name.lower() + '_snp_batch' + '_result_url.txt' - print(out_url_file) + out_url_file = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' + print('Writing output url file:', out_url_file) myfile = open(out_url_file, 'a') - myfile.write(url + '\n') + myfile.write(url) myfile.close() - else: - print('ERROR: invalid mutation! Wild-type residue doesn\'t match pdb file.' - , '\nSkipping to the next mutation in file...') - #=============== - # writing file: invalid mutations - #=============== - out_error_file = output_dir + '/' + gene_name.lower() + '_errors.txt' - failed_muts = open(out_error_file, 'a') - failed_muts.write(mutation_list + '\n') - failed_muts.close() #%% def request_calculation(pdb_file, mutation_list @@ -66,7 +73,8 @@ def request_calculation(pdb_file, mutation_list , prediction_url , output_dir , gene_name - , url_file): + , batch_no + , out_url_file): """ Makes a POST request for a ligand affinity prediction.