handle not ready (refresh) url

Please enter the commit message for your changes. Lines starting
2020-04-21 17:12:18 +01:00 · 2020-04-21 17:12:18 +01:00 · b28d866237
commit b28d866237
parent a405aa17c3
2 changed files with 60 additions and 48 deletions
--- a/mcsm/mcsm.py
+++ b/mcsm/mcsm.py
@ -119,16 +119,24 @@ def scrape_results(result_url):
    #    if results_response is not None:
    #        page = results_page.text
    if result_response.status_code == 200:
-        print('SUCCESS: Fetching results')
+        print('Fetching results')
+        # extract results using the html parser          
+        soup = BeautifulSoup(result_response.text, features = 'html.parser')
+        # print(soup)
+        web_result_raw = soup.find(class_ = 'span4').get_text()
+        #metatags = soup.find_all('meta')
+        metatags = soup.find_all('meta', attrs={'http-equiv':'refresh'})
+        #print('meta tags:', metatags)
+        if metatags:
+            print('WARNING: Submission not ready for URL:', result_url)
+            # TODO: Add logging
+            #if debug:
+            #    debug.warning('submission not ready for URL:', result_url)
+        else:
+            return web_result_raw
    else:
        print('FAIL: Could not fetch results'
                , '\nCheck if url is valid')
-        #    extract results using the html parser          
-    soup = BeautifulSoup(result_response.text, features = 'html.parser')
-        #    print(soup)
-    web_result_raw = soup.find(class_ = 'span4').get_text()
-
-    return web_result_raw


 def build_result_dict(web_result_raw):
--- a/mcsm/mcsm_wrapper.py
+++ b/mcsm/mcsm_wrapper.py
@ -62,53 +62,57 @@ out_filename_format = gene.lower() + '_mcsm_processed.csv'
 outfile_format =  outdir + '/' + out_filename_format
 #%%=====================================================================
 def submit_mcsm():
-	my_chain = 'A'
-#	my_ligand_id = 'DCS'  # FIXME
-	my_ligand_id = 'RMP'  # FIXME
-	my_affinity = 10    
+    my_chain = 'A'
+#    my_ligand_id = 'DCS'  # FIXME
+    my_ligand_id = 'RMP'  # FIXME
+    my_affinity = 10    

-	print('Result urls and error file (if any) will be written in: ', outdir) 
+    print('Result urls and error file (if any) will be written in: ', outdir) 
                   
-	# call function to format data to remove duplicate snps before submitting job
-	mcsm_muts = format_data(infile_snps) 
-	mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
-	infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
-	print('Total SNPs for', gene, ':', infile_snps_len) 
-	for mcsm_mut in mcsm_muts:
-		print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
-		print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)   		
-		# function call: to request mcsm prediction
-		# which writes file containing url for valid submissions and invalid muts to respective files
-		holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
-		time.sleep(1)
-		mut_count += 1
-	#    result_url = write_result_url(holding_page, result_urls, host)
+    # call function to format data to remove duplicate snps before submitting job
+    mcsm_muts = format_data(infile_snps) 
+    mut_count = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
+    infile_snps_len = os.popen('wc -l < %s' % infile_snps).read() # quicker than using Python :-)
+    print('Total SNPs for', gene, ':', infile_snps_len) 
+    for mcsm_mut in mcsm_muts:
+        print('Processing mutation: %s of %s' % (mut_count, infile_snps_len), mcsm_mut)
+        print('Parameters for mcsm_lig:', in_filename_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene)           
+        # function call: to request mcsm prediction
+        # which writes file containing url for valid submissions and invalid muts to respective files
+        holding_page = request_calculation(infile_pdb, mcsm_mut, my_chain, my_ligand_id, my_affinity, prediction_url, outdir, gene, host)
+        time.sleep(1)
+        mut_count += 1
+    #    result_url = write_result_url(holding_page, result_urls, host)
        
-	print('Request submitted'
-		, '\nCAUTION: Processing will take at least ten'
-		,  'minutes, but will be longer for more mutations.')
+    print('Request submitted'
+        , '\nCAUTION: Processing will take at least ten'
+        ,  'minutes, but will be longer for more mutations.')
 #%%=====================================================================
 def get_results():

-	output_df = pd.DataFrame()
-	url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
-	infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)
+    output_df = pd.DataFrame()
+    url_counter = 1 # HURR DURR COUNT STARTEDS AT ONE1`!1
+    success_counter = 1
+    infile_len = os.popen('wc -l < %s' % result_urls).read() # quicker than using Python :-) #FIXME filenme (infile_urls)

-	print('Total URLs:', infile_len)
+    print('Total URLs:', infile_len)

-	with open(result_urls, 'r') as urlfile:
-		for line in urlfile:
-			url_line = line.strip()
-			# call functions
-			results_interim = scrape_results(url_line)
-			result_dict = build_result_dict(results_interim)
+    with open(result_urls, 'r') as urlfile:
+        for line in urlfile:
+            url_line = line.strip()
+            # call functions
+            results_interim = scrape_results(url_line)
+            if results_interim is not None:
+                print('Processing URL: %s of %s' % (url_counter, infile_len))
+                result_dict = build_result_dict(results_interim)
+                df = pd.DataFrame(result_dict, index=[url_counter])
+                output_df = output_df.append(df)
+                success_counter += 1
+            url_counter += 1
            
-			print('Processing URL: %s of %s' % (url_counter, infile_len))
-			df = pd.DataFrame(result_dict, index=[url_counter])
-			url_counter += 1
-			output_df = output_df.append(df)
+    print('Total URLs: %s Successful: %s Failed: %s' % (url_counter-1, success_counter-1, (url_counter - success_counter)))

-	output_df.to_csv(mcsm_output, index = None, header = True)
+    output_df.to_csv(mcsm_output, index = None, header = True)
 #%%=====================================================================
 def format_results():
    print('Input file:', mcsm_output