revert browser.py and pull status from browser.websock_thread.page_status instead.

This commit is contained in:
Adam Miller 2024-11-12 13:06:09 -08:00
parent 0dca9f3861
commit 0f100e6a71
2 changed files with 6 additions and 5 deletions

View File

@ -596,7 +596,7 @@ class Browser:
outlinks = self.extract_outlinks(timeout=extract_outlinks_timeout)
if run_behaviors and not skip_visit_hashtags:
self.visit_hashtags(final_page_url, hashtags, outlinks)
return final_page_url, outlinks, self.websock_thread.page_status
return final_page_url, outlinks
except brozzler.ReachedLimit:
# websock_thread has stashed the ReachedLimit exception with
# more information, raise that one

View File

@ -255,17 +255,18 @@ class BrozzlerWorker:
else:
self.logger.info("needs browsing: %s", page)
try:
browser_outlinks, status_code = self._browse_page(
browser_outlinks = self._browse_page(
browser, site, page, on_screenshot, on_request
)
outlinks.update(browser_outlinks)
status_code = browser.websock_thread.page_status
if status_code in [502, 504]:
raise brozzler.PageConnectionError()
except brozzler.PageInterstitialShown:
self.logger.info("page interstitial shown (http auth): %s", page)
if enable_youtube_dl and ydl.should_ytdlp(
site, page, browser.websock_thread.page_status, self._skip_av_seeds
site, page, status_code, self._skip_av_seeds
):
try:
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
@ -394,7 +395,7 @@ class BrozzlerWorker:
window_height=self._window_height,
window_width=self._window_width,
)
final_page_url, outlinks, status_code = browser.browse_page(
final_page_url, outlinks = browser.browse_page(
page.url,
extra_headers=site.extra_headers(page),
behavior_parameters=site.get("behavior_parameters"),
@ -419,7 +420,7 @@ class BrozzlerWorker:
)
if final_page_url != page.url:
page.note_redirect(final_page_url)
return outlinks, status_code
return outlinks
def _fetch_url(self, site, url=None, page=None):
proxies = None