mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
more shutdown tweaks
This commit is contained in:
parent
5fa96b6438
commit
4bdad4729a
@ -77,7 +77,7 @@ class BrowserPool:
|
||||
try:
|
||||
browser = self._available.pop()
|
||||
except KeyError:
|
||||
raise NoBrowsersAvailable()
|
||||
raise NoBrowsersAvailable
|
||||
self._in_use.add(browser)
|
||||
return browser
|
||||
|
||||
@ -87,8 +87,12 @@ class BrowserPool:
|
||||
self._in_use.remove(browser)
|
||||
|
||||
def shutdown_now(self):
|
||||
self.logger.info('shutting down browser pool')
|
||||
self.logger.info(
|
||||
'shutting down browser pool (%s browsers in use)',
|
||||
len(self._in_use))
|
||||
with self._lock:
|
||||
for browser in self._available:
|
||||
browser.stop()
|
||||
for browser in self._in_use:
|
||||
browser.stop()
|
||||
|
||||
@ -114,6 +118,7 @@ class Browser:
|
||||
self.chrome = Chrome(**kwargs)
|
||||
self.websocket_url = None
|
||||
self.is_browsing = False
|
||||
self._browser_controller = None
|
||||
|
||||
def __enter__(self):
|
||||
self.start()
|
||||
@ -139,10 +144,10 @@ class Browser:
|
||||
Stops chrome if it's running.
|
||||
'''
|
||||
try:
|
||||
if self.is_running():
|
||||
if self._browser_controller:
|
||||
self._browser_controller.stop()
|
||||
self.websocket_url = None
|
||||
self.chrome.stop()
|
||||
self.websocket_url = None
|
||||
self.chrome.stop()
|
||||
except:
|
||||
self.logger.error('problem stopping', exc_info=True)
|
||||
|
||||
@ -215,19 +220,9 @@ class Browser:
|
||||
## outlinks += retrieve_outlinks (60 sec)
|
||||
final_page_url = self._browser_controller.url()
|
||||
return final_page_url, outlinks
|
||||
except brozzler.ShutdownRequested:
|
||||
self.logger.info('shutdown requested')
|
||||
raise
|
||||
except websocket.WebSocketConnectionClosedException as e:
|
||||
# import pdb; pdb.set_trace()
|
||||
self.logger.error('websocket closed, did chrome die?')
|
||||
raise BrowsingException(e)
|
||||
# if not self.is_running():
|
||||
# logging.info('appears shutdown was requested')
|
||||
# return None, None
|
||||
# else:
|
||||
# raise BrowsingException(
|
||||
# "websocket closed, did chrome die? %s" % (
|
||||
# self.websocket_url))
|
||||
finally:
|
||||
self.is_browsing = False
|
||||
|
||||
@ -287,8 +282,12 @@ class BrowserController:
|
||||
'''
|
||||
Raises BrowsingException in the thread that called start()
|
||||
'''
|
||||
self.logger.error(
|
||||
'exception from websocket receiver thread', exc_info=1)
|
||||
if isinstance(e, websocket.WebSocketConnectionClosedException):
|
||||
self.logger.error('websocket closed, did chrome die?')
|
||||
else:
|
||||
self.logger.error(
|
||||
'exception from websocket receiver thread',
|
||||
exc_info=1)
|
||||
brozzler.thread_raise(calling_thread, BrowsingException)
|
||||
|
||||
# open websocket, start thread that receives messages
|
||||
|
@ -50,6 +50,7 @@ class Chrome:
|
||||
self.chrome_exe = chrome_exe
|
||||
self.ignore_cert_errors = ignore_cert_errors
|
||||
self._shutdown = threading.Event()
|
||||
self.chrome_process = None
|
||||
|
||||
def __enter__(self):
|
||||
'''
|
||||
@ -188,6 +189,8 @@ class Chrome:
|
||||
'got chrome window websocket debug url %s from %s',
|
||||
url, json_url)
|
||||
return url
|
||||
except brozzler.ShutdownRequested:
|
||||
raise
|
||||
except BaseException as e:
|
||||
if int(time.time() - self._start) % 10 == 5:
|
||||
self.logger.warn(
|
||||
@ -253,18 +256,18 @@ class Chrome:
|
||||
def stop(self):
|
||||
if not self.chrome_process or self._shutdown.is_set():
|
||||
return
|
||||
self._shutdown.set()
|
||||
|
||||
timeout_sec = 300
|
||||
self._shutdown.set()
|
||||
self.logger.info('terminating chrome pgid %s' % self.chrome_process.pid)
|
||||
if self.chrome_process.poll() is None:
|
||||
self.logger.info(
|
||||
'terminating chrome pgid %s', self.chrome_process.pid)
|
||||
|
||||
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
||||
first_sigterm = time.time()
|
||||
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
||||
t0 = time.time()
|
||||
|
||||
try:
|
||||
while time.time() - first_sigterm < timeout_sec:
|
||||
time.sleep(0.5)
|
||||
|
||||
while time.time() - t0 < timeout_sec:
|
||||
status = self.chrome_process.poll()
|
||||
if status is not None:
|
||||
if status == 0:
|
||||
@ -281,11 +284,12 @@ class Chrome:
|
||||
# around, but there's a chance I suppose that some other
|
||||
# process could have started with the same pgid
|
||||
return
|
||||
time.sleep(0.5)
|
||||
|
||||
self.logger.warn(
|
||||
'chrome pid %s still alive %.1f seconds after sending '
|
||||
'SIGTERM, sending SIGKILL', self.chrome_process.pid,
|
||||
time.time() - first_sigterm)
|
||||
time.time() - t0)
|
||||
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
||||
status = self.chrome_process.wait()
|
||||
self.logger.warn(
|
||||
@ -301,3 +305,4 @@ class Chrome:
|
||||
finally:
|
||||
self._out_reader_thread.join()
|
||||
self.chrome_process = None
|
||||
|
||||
|
@ -322,9 +322,9 @@ class BrozzlerWorker:
|
||||
return False
|
||||
|
||||
def _brozzle_site(self, browser, site):
|
||||
start = time.time()
|
||||
page = None
|
||||
try:
|
||||
start = time.time()
|
||||
while time.time() - start < 7 * 60:
|
||||
self._frontier.honor_stop_request(site.job_id)
|
||||
page = self._frontier.claim_page(site, "%s:%s" % (
|
||||
|
Loading…
x
Reference in New Issue
Block a user