mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
handle case where websocket connection is unexpectedly closed during the post-behavior phase
This commit is contained in:
parent
3bf3c80720
commit
c6e6b34e82
@ -238,25 +238,41 @@ class Browser:
|
|||||||
if self._post_behavior_interval_func():
|
if self._post_behavior_interval_func():
|
||||||
return self._outlinks
|
return self._outlinks
|
||||||
finally:
|
finally:
|
||||||
if self._websock and self._websock.sock and self._websock.sock.connected:
|
if (self._websock and self._websock.sock
|
||||||
|
and self._websock.sock.connected):
|
||||||
try:
|
try:
|
||||||
self._websock.close()
|
self._websock.close()
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
self.logger.error("exception closing websocket {} - {}".format(self._websock, e))
|
self.logger.error(
|
||||||
|
"exception closing websocket %s - %s" % (
|
||||||
|
self._websock, e))
|
||||||
|
|
||||||
websock_thread.join(timeout=30)
|
websock_thread.join(timeout=30)
|
||||||
if websock_thread.is_alive():
|
if websock_thread.is_alive():
|
||||||
self.logger.error("{} still alive 30 seconds after closing {}, will forcefully nudge it again".format(websock_thread, self._websock))
|
self.logger.error(
|
||||||
|
"%s still alive 30 seconds after closing %s, will "
|
||||||
|
"forcefully nudge it again" % (
|
||||||
|
websock_thread, self._websock))
|
||||||
self._websock.keep_running = False
|
self._websock.keep_running = False
|
||||||
websock_thread.join(timeout=30)
|
websock_thread.join(timeout=30)
|
||||||
if websock_thread.is_alive():
|
if websock_thread.is_alive():
|
||||||
self.logger.critical("{} still alive 60 seconds after closing {}".format(websock_thread, self._websock))
|
self.logger.critical(
|
||||||
|
"%s still alive 60 seconds after closing %s" % (
|
||||||
|
websock_thread, self._websock))
|
||||||
|
|
||||||
self._behavior = None
|
self._behavior = None
|
||||||
|
|
||||||
def _post_behavior_interval_func(self):
|
def _post_behavior_interval_func(self):
|
||||||
"""Called periodically after behavior is finished on the page. Returns
|
"""
|
||||||
true when post-behavior tasks are finished."""
|
Called periodically after behavior is finished on the page. Returns
|
||||||
|
true when post-behavior tasks are finished.
|
||||||
|
"""
|
||||||
|
if (not self._websock or not self._websock.sock
|
||||||
|
or not self._websock.sock.connected):
|
||||||
|
raise BrowsingException(
|
||||||
|
"websocket closed, did chrome die? {}".format(
|
||||||
|
self._websocket_url))
|
||||||
|
|
||||||
if (not self._has_screenshot
|
if (not self._has_screenshot
|
||||||
and not self._waiting_on_scroll_to_top_msg_id
|
and not self._waiting_on_scroll_to_top_msg_id
|
||||||
and not self._waiting_on_screenshot_msg_id):
|
and not self._waiting_on_screenshot_msg_id):
|
||||||
@ -501,15 +517,19 @@ class Chrome:
|
|||||||
self.ignore_cert_errors = ignore_cert_errors
|
self.ignore_cert_errors = ignore_cert_errors
|
||||||
self._shutdown = threading.Event()
|
self._shutdown = threading.Event()
|
||||||
|
|
||||||
# returns websocket url to chrome window with about:blank loaded
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
|
'''
|
||||||
|
Returns websocket url to chrome window with about:blank loaded.
|
||||||
|
'''
|
||||||
return self.start()
|
return self.start()
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.stop()
|
self.stop()
|
||||||
|
|
||||||
# returns websocket url to chrome window with about:blank loaded
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
'''
|
||||||
|
Returns websocket url to chrome window with about:blank loaded.
|
||||||
|
'''
|
||||||
timeout_sec = 600
|
timeout_sec = 600
|
||||||
new_env = os.environ.copy()
|
new_env = os.environ.copy()
|
||||||
new_env["HOME"] = self.user_home_dir
|
new_env["HOME"] = self.user_home_dir
|
||||||
@ -615,7 +635,7 @@ class Chrome:
|
|||||||
|
|
||||||
timeout_sec = 300
|
timeout_sec = 300
|
||||||
self._shutdown.set()
|
self._shutdown.set()
|
||||||
self.logger.info("terminating chrome pid %s" % self.chrome_process.pid)
|
self.logger.info("terminating chrome pgid %s" % self.chrome_process.pid)
|
||||||
|
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
||||||
first_sigterm = time.time()
|
first_sigterm = time.time()
|
||||||
|
2
setup.py
2
setup.py
@ -21,7 +21,7 @@ import setuptools
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b3.dev49',
|
version='1.1b3.dev50',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user