mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 23:35:54 -04:00
sigterm and sigint both shutdown request shutdown, which stops consuming urls and waits for active browsers to finish; a second sigint/sigterm immediately shuts down active browsers
This commit is contained in:
parent
b67d9fadf0
commit
9c08be2699
28
bin/umbra
28
bin/umbra
@ -9,6 +9,8 @@ import sys
|
||||
import signal
|
||||
import os
|
||||
import umbra
|
||||
import signal
|
||||
import threading
|
||||
|
||||
if __name__=="__main__":
|
||||
import faulthandler
|
||||
@ -43,14 +45,34 @@ if __name__=="__main__":
|
||||
exchange_name=args.amqp_exchange, queue_name=args.amqp_queue,
|
||||
routing_key=args.amqp_routing_key)
|
||||
|
||||
class ShutdownRequested(Exception):
|
||||
pass
|
||||
|
||||
def sigterm(signum, frame):
|
||||
raise ShutdownRequested('shutdown requested (caught SIGTERM)')
|
||||
def sigint(signum, frame):
|
||||
raise ShutdownRequested('shutdown requested (caught SIGINT)')
|
||||
|
||||
signal.signal(signal.SIGTERM, sigterm)
|
||||
signal.signal(signal.SIGINT, sigint)
|
||||
|
||||
umbra.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(0.5)
|
||||
except:
|
||||
pass
|
||||
except ShutdownRequested as e:
|
||||
logging.info(e)
|
||||
except BaseException as e:
|
||||
logging.fatal(e)
|
||||
finally:
|
||||
umbra.shutdown()
|
||||
try:
|
||||
umbra.shutdown()
|
||||
for th in threading.enumerate():
|
||||
if th != threading.current_thread():
|
||||
th.join()
|
||||
except BaseException as e:
|
||||
logging.warn("caught {}".format(e))
|
||||
umbra.shutdown_now()
|
||||
|
||||
|
||||
|
@ -20,6 +20,7 @@ class BrowserPool:
|
||||
|
||||
def __init__(self, size=3, chrome_exe='chromium-browser', chrome_wait=60):
|
||||
self._available = set()
|
||||
self._in_use = set()
|
||||
|
||||
for i in range(0, size):
|
||||
port_holder = self._grab_random_port()
|
||||
@ -47,12 +48,19 @@ class BrowserPool:
|
||||
with self._lock:
|
||||
(browser, port_holder) = self._available.pop()
|
||||
port_holder.close()
|
||||
self._in_use.add(browser)
|
||||
return browser
|
||||
|
||||
def release(self, browser):
|
||||
with self._lock:
|
||||
port_holder = self._hold_port(browser.chrome_port)
|
||||
self._available.add((browser, port_holder))
|
||||
self._in_use.remove(browser)
|
||||
|
||||
def shutdown_now(self):
|
||||
for browser in self._in_use:
|
||||
browser.shutdown_now()
|
||||
|
||||
|
||||
class Browser:
|
||||
"""Runs chrome/chromium to synchronously browse one page at a time using
|
||||
@ -72,6 +80,10 @@ class Browser:
|
||||
self.chrome_wait = chrome_wait
|
||||
self._behavior = None
|
||||
self.websock = None
|
||||
self._shutdown_now = False
|
||||
|
||||
def shutdown_now(self):
|
||||
self._shutdown_now = True
|
||||
|
||||
def browse_page(self, url, on_request=None):
|
||||
"""Synchronously browses a page and runs behaviors. First blocks to
|
||||
@ -103,6 +115,9 @@ class Browser:
|
||||
elif self._behavior != None and self._behavior.is_finished():
|
||||
self.logger.info("finished browsing page according to behavior url={}".format(self.url))
|
||||
break
|
||||
elif self._shutdown_now:
|
||||
self.logger.warn("immediate shutdown requested")
|
||||
break
|
||||
|
||||
try:
|
||||
self.websock.close()
|
||||
|
@ -73,9 +73,12 @@ class AmqpBrowserController:
|
||||
self.logger.info("shutting down amqp consumer {}".format(self.amqp_url))
|
||||
self._amqp_stop.set()
|
||||
self._amqp_thread.join()
|
||||
with self._producer_lock:
|
||||
self._producer_conn.close()
|
||||
self._producer_conn = None
|
||||
# with self._producer_lock:
|
||||
# self._producer_conn.close()
|
||||
# self._producer_conn = None
|
||||
|
||||
def shutdown_now(self):
|
||||
self._browser_pool.shutdown_now()
|
||||
|
||||
def _consume_amqp(self):
|
||||
# XXX https://webarchive.jira.com/browse/ARI-3811
|
||||
@ -86,11 +89,11 @@ class AmqpBrowserController:
|
||||
# reopen the connection every 15 minutes
|
||||
RECONNECT_AFTER_SECONDS = 15 * 60
|
||||
|
||||
browser = None
|
||||
url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key,
|
||||
exchange=self._exchange)
|
||||
|
||||
while not self._amqp_stop.is_set():
|
||||
try:
|
||||
url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key, exchange=self._exchange)
|
||||
self.logger.info("connecting to amqp exchange={} at {}".format(self._exchange.name, self.amqp_url))
|
||||
with kombu.Connection(self.amqp_url) as conn:
|
||||
conn_opened = time.time()
|
||||
@ -99,13 +102,13 @@ class AmqpBrowserController:
|
||||
while (not self._amqp_stop.is_set() and time.time() - conn_opened < RECONNECT_AFTER_SECONDS):
|
||||
import socket
|
||||
try:
|
||||
browser = self._browser_pool.acquire()
|
||||
browser = self._browser_pool.acquire() # raises KeyError if none available
|
||||
consumer.callbacks = [self._make_callback(browser)]
|
||||
conn.drain_events(timeout=0.5)
|
||||
consumer.callbacks = None
|
||||
except KeyError:
|
||||
# no browsers available
|
||||
pass
|
||||
time.sleep(0.5)
|
||||
except socket.timeout:
|
||||
# no urls in the queue
|
||||
self._browser_pool.release(browser)
|
||||
@ -122,9 +125,6 @@ class AmqpBrowserController:
|
||||
return callback
|
||||
|
||||
def _browse_page(self, browser, client_id, url, parent_url_metadata):
|
||||
"""Kombu Consumer callback. Provisions a Browser and
|
||||
asynchronously asks it to browse the requested url."""
|
||||
|
||||
def on_request(chrome_msg):
|
||||
payload = chrome_msg['params']['request']
|
||||
payload['parentUrl'] = url
|
||||
@ -136,8 +136,10 @@ class AmqpBrowserController:
|
||||
|
||||
def browse_page_async():
|
||||
self.logger.info('browser={} client_id={} url={}'.format(browser, client_id, url))
|
||||
browser.browse_page(url, on_request=on_request)
|
||||
self._browser_pool.release(browser)
|
||||
try:
|
||||
browser.browse_page(url, on_request=on_request)
|
||||
finally:
|
||||
self._browser_pool.release(browser)
|
||||
|
||||
import random
|
||||
threadName = "BrowsingThread{}-{}".format(browser.chrome_port,
|
||||
|
Loading…
x
Reference in New Issue
Block a user