mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
timeout for fetch_url
This commit is contained in:
parent
7ededbc521
commit
65de0d2a5f
@ -52,6 +52,7 @@ class BrozzlerWorker:
|
|||||||
HEARTBEAT_INTERVAL = 200.0
|
HEARTBEAT_INTERVAL = 200.0
|
||||||
SITE_SESSION_MINUTES = 15
|
SITE_SESSION_MINUTES = 15
|
||||||
HEADER_REQUEST_TIMEOUT = 30
|
HEADER_REQUEST_TIMEOUT = 30
|
||||||
|
FETCH_URL_TIMEOUT = 60
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -334,6 +335,7 @@ class BrozzlerWorker:
|
|||||||
# bypassing warcprox, requests' stream=True defers downloading the body of the response
|
# bypassing warcprox, requests' stream=True defers downloading the body of the response
|
||||||
# see https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
|
# see https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
|
||||||
try:
|
try:
|
||||||
|
self.logger.info("getting page headers for %s", page.url)
|
||||||
with requests.get(
|
with requests.get(
|
||||||
page.url, stream=True, verify=False, timeout=self.HEADER_REQUEST_TIMEOUT
|
page.url, stream=True, verify=False, timeout=self.HEADER_REQUEST_TIMEOUT
|
||||||
) as r:
|
) as r:
|
||||||
@ -485,8 +487,14 @@ class BrozzlerWorker:
|
|||||||
try:
|
try:
|
||||||
# response is ignored
|
# response is ignored
|
||||||
requests.get(
|
requests.get(
|
||||||
url, proxies=proxies, headers=site.extra_headers(page), verify=False
|
url,
|
||||||
|
proxies=proxies,
|
||||||
|
headers=site.extra_headers(page),
|
||||||
|
verify=False,
|
||||||
|
timeout=self.FETCH_URL_TIMEOUT,
|
||||||
)
|
)
|
||||||
|
except requests.exceptions.Timeout as e:
|
||||||
|
self.logger.warning("Timed out fetching %s: %s", page.url, e)
|
||||||
except requests.exceptions.ProxyError as e:
|
except requests.exceptions.ProxyError as e:
|
||||||
raise brozzler.ProxyError("proxy error fetching %s" % url) from e
|
raise brozzler.ProxyError("proxy error fetching %s" % url) from e
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user