Merge branch 'prom_headers_update' into qa

This commit is contained in:
Barbara Miller 2024-10-08 17:18:23 -07:00
commit c3c54dab7a

View File

@ -332,13 +332,13 @@ class BrozzlerWorker:
def _get_page_headers(self, page):
# bypassing warcprox, requests' stream=True defers downloading the body of the response
# see https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
page_headers = {}
try:
with requests.get(page.url, stream=True, verify=False) as r:
page_headers = r.headers
return page_headers
except requests.exceptions.RequestException as e:
self.logger.warning("Failed to get headers for %s: %s", page.url, e)
return {}
return page_headers
def _needs_browsing(self, page_headers) -> bool:
return not bool(