mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 08:09:48 -05:00
fix: handle exceptions when requesting page headers for content-type sampling.
This commit is contained in:
parent
43a37f02ce
commit
f40bbd13d9
@ -295,9 +295,13 @@ class BrozzlerWorker:
|
||||
def _get_page_headers(self, page):
|
||||
# bypassing warcprox, requests' stream=True defers downloading the body of the response
|
||||
# see https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
|
||||
with requests.get(page.url, stream=True) as r:
|
||||
page_headers = r.headers
|
||||
return page_headers
|
||||
try:
|
||||
with requests.get(page.url, stream=True, verify=False) as r:
|
||||
page_headers = r.headers
|
||||
return page_headers
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.logger.warning("Failed to get headers for %s: %s", page.url, e)
|
||||
return {}
|
||||
|
||||
def _needs_browsing(self, page_headers):
|
||||
if (
|
||||
|
Loading…
x
Reference in New Issue
Block a user