diff --git a/brozzler/worker.py b/brozzler/worker.py index 2ad4eea..eb1e5b6 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -296,9 +296,13 @@ class BrozzlerWorker: def _get_page_headers(self, page): # bypassing warcprox, requests' stream=True defers downloading the body of the response # see https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow - with requests.get(page.url, stream=True) as r: - page_headers = r.headers - return page_headers + try: + with requests.get(page.url, stream=True, verify=False) as r: + page_headers = r.headers + return page_headers + except requests.exceptions.RequestException as e: + self.logger.warning("Failed to get headers for %s: %s", page.url, e) + return {} def _needs_browsing(self, page_headers): if ( diff --git a/setup.py b/setup.py index 9b753a1..18a4e0c 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def find_package_data(package): setuptools.setup( name="brozzler", - version="1.5.53a1", + version="1.5.53a2", description="Distributed web crawling with browsers", url="https://github.com/internetarchive/brozzler", author="Noah Levitt",