mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-26 08:15:43 -04:00
more shutdown tweaks
This commit is contained in:
parent
5fa96b6438
commit
4bdad4729a
3 changed files with 31 additions and 27 deletions
|
@ -77,7 +77,7 @@ class BrowserPool:
|
||||||
try:
|
try:
|
||||||
browser = self._available.pop()
|
browser = self._available.pop()
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise NoBrowsersAvailable()
|
raise NoBrowsersAvailable
|
||||||
self._in_use.add(browser)
|
self._in_use.add(browser)
|
||||||
return browser
|
return browser
|
||||||
|
|
||||||
|
@ -87,8 +87,12 @@ class BrowserPool:
|
||||||
self._in_use.remove(browser)
|
self._in_use.remove(browser)
|
||||||
|
|
||||||
def shutdown_now(self):
|
def shutdown_now(self):
|
||||||
self.logger.info('shutting down browser pool')
|
self.logger.info(
|
||||||
|
'shutting down browser pool (%s browsers in use)',
|
||||||
|
len(self._in_use))
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
for browser in self._available:
|
||||||
|
browser.stop()
|
||||||
for browser in self._in_use:
|
for browser in self._in_use:
|
||||||
browser.stop()
|
browser.stop()
|
||||||
|
|
||||||
|
@ -114,6 +118,7 @@ class Browser:
|
||||||
self.chrome = Chrome(**kwargs)
|
self.chrome = Chrome(**kwargs)
|
||||||
self.websocket_url = None
|
self.websocket_url = None
|
||||||
self.is_browsing = False
|
self.is_browsing = False
|
||||||
|
self._browser_controller = None
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.start()
|
self.start()
|
||||||
|
@ -139,7 +144,7 @@ class Browser:
|
||||||
Stops chrome if it's running.
|
Stops chrome if it's running.
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
if self.is_running():
|
if self._browser_controller:
|
||||||
self._browser_controller.stop()
|
self._browser_controller.stop()
|
||||||
self.websocket_url = None
|
self.websocket_url = None
|
||||||
self.chrome.stop()
|
self.chrome.stop()
|
||||||
|
@ -215,19 +220,9 @@ class Browser:
|
||||||
## outlinks += retrieve_outlinks (60 sec)
|
## outlinks += retrieve_outlinks (60 sec)
|
||||||
final_page_url = self._browser_controller.url()
|
final_page_url = self._browser_controller.url()
|
||||||
return final_page_url, outlinks
|
return final_page_url, outlinks
|
||||||
except brozzler.ShutdownRequested:
|
|
||||||
self.logger.info('shutdown requested')
|
|
||||||
raise
|
|
||||||
except websocket.WebSocketConnectionClosedException as e:
|
except websocket.WebSocketConnectionClosedException as e:
|
||||||
# import pdb; pdb.set_trace()
|
self.logger.error('websocket closed, did chrome die?')
|
||||||
raise BrowsingException(e)
|
raise BrowsingException(e)
|
||||||
# if not self.is_running():
|
|
||||||
# logging.info('appears shutdown was requested')
|
|
||||||
# return None, None
|
|
||||||
# else:
|
|
||||||
# raise BrowsingException(
|
|
||||||
# "websocket closed, did chrome die? %s" % (
|
|
||||||
# self.websocket_url))
|
|
||||||
finally:
|
finally:
|
||||||
self.is_browsing = False
|
self.is_browsing = False
|
||||||
|
|
||||||
|
@ -287,8 +282,12 @@ class BrowserController:
|
||||||
'''
|
'''
|
||||||
Raises BrowsingException in the thread that called start()
|
Raises BrowsingException in the thread that called start()
|
||||||
'''
|
'''
|
||||||
|
if isinstance(e, websocket.WebSocketConnectionClosedException):
|
||||||
|
self.logger.error('websocket closed, did chrome die?')
|
||||||
|
else:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
'exception from websocket receiver thread', exc_info=1)
|
'exception from websocket receiver thread',
|
||||||
|
exc_info=1)
|
||||||
brozzler.thread_raise(calling_thread, BrowsingException)
|
brozzler.thread_raise(calling_thread, BrowsingException)
|
||||||
|
|
||||||
# open websocket, start thread that receives messages
|
# open websocket, start thread that receives messages
|
||||||
|
|
|
@ -50,6 +50,7 @@ class Chrome:
|
||||||
self.chrome_exe = chrome_exe
|
self.chrome_exe = chrome_exe
|
||||||
self.ignore_cert_errors = ignore_cert_errors
|
self.ignore_cert_errors = ignore_cert_errors
|
||||||
self._shutdown = threading.Event()
|
self._shutdown = threading.Event()
|
||||||
|
self.chrome_process = None
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
'''
|
'''
|
||||||
|
@ -188,6 +189,8 @@ class Chrome:
|
||||||
'got chrome window websocket debug url %s from %s',
|
'got chrome window websocket debug url %s from %s',
|
||||||
url, json_url)
|
url, json_url)
|
||||||
return url
|
return url
|
||||||
|
except brozzler.ShutdownRequested:
|
||||||
|
raise
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
if int(time.time() - self._start) % 10 == 5:
|
if int(time.time() - self._start) % 10 == 5:
|
||||||
self.logger.warn(
|
self.logger.warn(
|
||||||
|
@ -253,18 +256,18 @@ class Chrome:
|
||||||
def stop(self):
|
def stop(self):
|
||||||
if not self.chrome_process or self._shutdown.is_set():
|
if not self.chrome_process or self._shutdown.is_set():
|
||||||
return
|
return
|
||||||
|
self._shutdown.set()
|
||||||
|
|
||||||
timeout_sec = 300
|
timeout_sec = 300
|
||||||
self._shutdown.set()
|
if self.chrome_process.poll() is None:
|
||||||
self.logger.info('terminating chrome pgid %s' % self.chrome_process.pid)
|
self.logger.info(
|
||||||
|
'terminating chrome pgid %s', self.chrome_process.pid)
|
||||||
|
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
||||||
first_sigterm = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while time.time() - first_sigterm < timeout_sec:
|
while time.time() - t0 < timeout_sec:
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
status = self.chrome_process.poll()
|
status = self.chrome_process.poll()
|
||||||
if status is not None:
|
if status is not None:
|
||||||
if status == 0:
|
if status == 0:
|
||||||
|
@ -281,11 +284,12 @@ class Chrome:
|
||||||
# around, but there's a chance I suppose that some other
|
# around, but there's a chance I suppose that some other
|
||||||
# process could have started with the same pgid
|
# process could have started with the same pgid
|
||||||
return
|
return
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
self.logger.warn(
|
self.logger.warn(
|
||||||
'chrome pid %s still alive %.1f seconds after sending '
|
'chrome pid %s still alive %.1f seconds after sending '
|
||||||
'SIGTERM, sending SIGKILL', self.chrome_process.pid,
|
'SIGTERM, sending SIGKILL', self.chrome_process.pid,
|
||||||
time.time() - first_sigterm)
|
time.time() - t0)
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
||||||
status = self.chrome_process.wait()
|
status = self.chrome_process.wait()
|
||||||
self.logger.warn(
|
self.logger.warn(
|
||||||
|
@ -301,3 +305,4 @@ class Chrome:
|
||||||
finally:
|
finally:
|
||||||
self._out_reader_thread.join()
|
self._out_reader_thread.join()
|
||||||
self.chrome_process = None
|
self.chrome_process = None
|
||||||
|
|
||||||
|
|
|
@ -322,9 +322,9 @@ class BrozzlerWorker:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _brozzle_site(self, browser, site):
|
def _brozzle_site(self, browser, site):
|
||||||
start = time.time()
|
|
||||||
page = None
|
page = None
|
||||||
try:
|
try:
|
||||||
|
start = time.time()
|
||||||
while time.time() - start < 7 * 60:
|
while time.time() - start < 7 * 60:
|
||||||
self._frontier.honor_stop_request(site.job_id)
|
self._frontier.honor_stop_request(site.job_id)
|
||||||
page = self._frontier.claim_page(site, "%s:%s" % (
|
page = self._frontier.claim_page(site, "%s:%s" % (
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue