diff --git a/dynamut/get_results_def.py b/dynamut/get_results_def.py index 43fa7e0..457a947 100644 --- a/dynamut/get_results_def.py +++ b/dynamut/get_results_def.py @@ -17,13 +17,8 @@ import pandas as pd from pandas.api.types import is_string_dtype from pandas.api.types import is_numeric_dtype #%%============================================================================ -homedir = os.path.expanduser('~') -#print(homedir) -host = 'http://biosig.unimelb.edu.au' -# Needed if things try to block the 'requests' user agent -#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} #%% -def get_results(url_file): +def get_results(url_file, host_url, outdir, outfile_suffix): # initilialise empty df dynamut_results_out_df = pd.DataFrame() with open(url_file, 'r') as f: @@ -38,7 +33,7 @@ def get_results(url_file): #dynamut_results_df = pd.DataFrame() for a in batch_soup.find_all('a', href=True, attrs = {'class':'btn btn-default btn-sm'}): print ("Found the URL:", a['href']) - single_result_url = host + a['href'] + single_result_url = host_url + a['href'] snp = re.search(r'([A-Z]+[0-9]+[A-Z]+$)', single_result_url).group(0) print(snp) print('\nGetting results from:', single_result_url) @@ -68,24 +63,47 @@ def get_results(url_file): print('Result DF:', results_df, 'for URL:', line) #dynamut_results_df = dynamut_results_df.append(results_df)#!1 too many!:-) dynamut_results_out_df = dynamut_results_out_df.append(results_df) - - #print(dynamut_results_out_df) + #print(dynamut_results_out_df) + #============================ + # Writing results file: csv + #============================ + dynamut_results_dir = outdir + '/dynamut_results' + if not os.path.exists(dynamut_results_dir): + print('\nCreating dir: dynamut_results within:', outdir ) + os.makedirs(dynamut_results_dir) print('\nWriting dynamut results df') print('\nResults File:' , '\nNo. of rows:', dynamut_results_out_df.shape[0] , '\nNo. of cols:', dynamut_results_out_df.shape[1]) print(dynamut_results_out_df) - dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False) + #dynamut_results_out_df.to_csv('/tmp/test_dynamut.csv', index = False) + + # build out filename + out_filename = dynamut_results_dir + '/dynamut_output_' + outfile_suffix + '.csv' + dynamut_results_out_df.to_csv(out_filename, index = False) +#%% EXAMPLE RUN +# globals +#homedir = os.path.expanduser('~') +#my_host = 'http://biosig.unimelb.edu.au' +#my_outdir = homedir + '/git/LSHTM_analysis/dynamut' -#%% +#============================================= # example 1: multiple urls in a single file -my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt' -print(my_url_file_multiple) -get_results(my_url_file_multiple) +#============================================= +#my_url_file_multiple = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_multiple.txt' +#print(my_url_file_multiple) +#get_results(url_file = my_url_file_multiple +# , host_url = my_host +# , outdir = my_outdir +# , outfile_suffix='multiple') +#============================================= # example 2: single url in a file -my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt' -print(my_url_file_multiple) -get_results(my_url_file_single) +#============================================= +#my_url_file_single = homedir + '/git/LSHTM_analysis/dynamut/dynamut_temp/dynamut_result_url_batch_single.txt' +#print(my_url_file_multiple) +#get_results(my_url_file_single +# , host_url = my_host +# , outdir = my_outdir +# , outfile_suffix = 'single') #%% - diff --git a/dynamut/submit_def.py b/dynamut/submit_def.py index e2add8e..1412ae2 100644 --- a/dynamut/submit_def.py +++ b/dynamut/submit_def.py @@ -31,7 +31,7 @@ def request_calculation(pdb_file #, out_url_file ): """ - Makes a POST request for a ligand affinity prediction. + Makes a POST request for dynamut predictions. @param pdb_file: valid path to pdb structure @type string @@ -100,12 +100,17 @@ indir = datadir + '/' + drug + '/input' outdir = datadir + '/' + drug + '/output' #outdir = homedir + '/git/LSHTM_analysis/dynamut' # for example - -my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' -my_mutation_list = homedir + '/git/Data/streptomycin/output/snp_batches/50/snp_batch_00.txt' my_chain = 'A' -my_batch = 1 -#my_outfile = dynamut_temp_dir + '/dynamut_result_url_batch_' + str(batch_no) + '.txt' +my_pdb_file = homedir + '/git/Data/streptomycin/input/gid_complex.pdb' + +# batch 1: 00.txt +#my_mutation_list = homedir + '/git/Data/streptomycin/output/snp_batches/50/snp_batch_00.txt' +#my_batch = 1 + +# batch 2: 01.txt +my_mutation_list = outdir + '/snp_batches/50/snp_batch_01.txt' +my_batch = 2 + # %% call this function request_calculation (pdb_file = my_pdb_file , mutation_list = my_mutation_list