sigterm and sigint both shutdown request shutdown, which stops consuming urls and waits for active browsers to finish; a second sigint/sigterm immediately shuts down active browsers

This commit is contained in:
Noah Levitt 2014-05-24 01:52:22 -07:00
parent b67d9fadf0
commit 9c08be2699
3 changed files with 54 additions and 15 deletions

View file

@ -9,6 +9,8 @@ import sys
import signal import signal
import os import os
import umbra import umbra
import signal
import threading
if __name__=="__main__": if __name__=="__main__":
import faulthandler import faulthandler
@ -43,14 +45,34 @@ if __name__=="__main__":
exchange_name=args.amqp_exchange, queue_name=args.amqp_queue, exchange_name=args.amqp_exchange, queue_name=args.amqp_queue,
routing_key=args.amqp_routing_key) routing_key=args.amqp_routing_key)
class ShutdownRequested(Exception):
pass
def sigterm(signum, frame):
raise ShutdownRequested('shutdown requested (caught SIGTERM)')
def sigint(signum, frame):
raise ShutdownRequested('shutdown requested (caught SIGINT)')
signal.signal(signal.SIGTERM, sigterm)
signal.signal(signal.SIGINT, sigint)
umbra.start() umbra.start()
try: try:
while True: while True:
time.sleep(0.5) time.sleep(0.5)
except: except ShutdownRequested as e:
pass logging.info(e)
except BaseException as e:
logging.fatal(e)
finally: finally:
try:
umbra.shutdown() umbra.shutdown()
for th in threading.enumerate():
if th != threading.current_thread():
th.join()
except BaseException as e:
logging.warn("caught {}".format(e))
umbra.shutdown_now()

View file

@ -20,6 +20,7 @@ class BrowserPool:
def __init__(self, size=3, chrome_exe='chromium-browser', chrome_wait=60): def __init__(self, size=3, chrome_exe='chromium-browser', chrome_wait=60):
self._available = set() self._available = set()
self._in_use = set()
for i in range(0, size): for i in range(0, size):
port_holder = self._grab_random_port() port_holder = self._grab_random_port()
@ -47,12 +48,19 @@ class BrowserPool:
with self._lock: with self._lock:
(browser, port_holder) = self._available.pop() (browser, port_holder) = self._available.pop()
port_holder.close() port_holder.close()
self._in_use.add(browser)
return browser return browser
def release(self, browser): def release(self, browser):
with self._lock: with self._lock:
port_holder = self._hold_port(browser.chrome_port) port_holder = self._hold_port(browser.chrome_port)
self._available.add((browser, port_holder)) self._available.add((browser, port_holder))
self._in_use.remove(browser)
def shutdown_now(self):
for browser in self._in_use:
browser.shutdown_now()
class Browser: class Browser:
"""Runs chrome/chromium to synchronously browse one page at a time using """Runs chrome/chromium to synchronously browse one page at a time using
@ -72,6 +80,10 @@ class Browser:
self.chrome_wait = chrome_wait self.chrome_wait = chrome_wait
self._behavior = None self._behavior = None
self.websock = None self.websock = None
self._shutdown_now = False
def shutdown_now(self):
self._shutdown_now = True
def browse_page(self, url, on_request=None): def browse_page(self, url, on_request=None):
"""Synchronously browses a page and runs behaviors. First blocks to """Synchronously browses a page and runs behaviors. First blocks to
@ -103,6 +115,9 @@ class Browser:
elif self._behavior != None and self._behavior.is_finished(): elif self._behavior != None and self._behavior.is_finished():
self.logger.info("finished browsing page according to behavior url={}".format(self.url)) self.logger.info("finished browsing page according to behavior url={}".format(self.url))
break break
elif self._shutdown_now:
self.logger.warn("immediate shutdown requested")
break
try: try:
self.websock.close() self.websock.close()

View file

@ -73,9 +73,12 @@ class AmqpBrowserController:
self.logger.info("shutting down amqp consumer {}".format(self.amqp_url)) self.logger.info("shutting down amqp consumer {}".format(self.amqp_url))
self._amqp_stop.set() self._amqp_stop.set()
self._amqp_thread.join() self._amqp_thread.join()
with self._producer_lock: # with self._producer_lock:
self._producer_conn.close() # self._producer_conn.close()
self._producer_conn = None # self._producer_conn = None
def shutdown_now(self):
self._browser_pool.shutdown_now()
def _consume_amqp(self): def _consume_amqp(self):
# XXX https://webarchive.jira.com/browse/ARI-3811 # XXX https://webarchive.jira.com/browse/ARI-3811
@ -86,11 +89,11 @@ class AmqpBrowserController:
# reopen the connection every 15 minutes # reopen the connection every 15 minutes
RECONNECT_AFTER_SECONDS = 15 * 60 RECONNECT_AFTER_SECONDS = 15 * 60
browser = None url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key,
exchange=self._exchange)
while not self._amqp_stop.is_set(): while not self._amqp_stop.is_set():
try: try:
url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key, exchange=self._exchange)
self.logger.info("connecting to amqp exchange={} at {}".format(self._exchange.name, self.amqp_url)) self.logger.info("connecting to amqp exchange={} at {}".format(self._exchange.name, self.amqp_url))
with kombu.Connection(self.amqp_url) as conn: with kombu.Connection(self.amqp_url) as conn:
conn_opened = time.time() conn_opened = time.time()
@ -99,13 +102,13 @@ class AmqpBrowserController:
while (not self._amqp_stop.is_set() and time.time() - conn_opened < RECONNECT_AFTER_SECONDS): while (not self._amqp_stop.is_set() and time.time() - conn_opened < RECONNECT_AFTER_SECONDS):
import socket import socket
try: try:
browser = self._browser_pool.acquire() browser = self._browser_pool.acquire() # raises KeyError if none available
consumer.callbacks = [self._make_callback(browser)] consumer.callbacks = [self._make_callback(browser)]
conn.drain_events(timeout=0.5) conn.drain_events(timeout=0.5)
consumer.callbacks = None consumer.callbacks = None
except KeyError: except KeyError:
# no browsers available # no browsers available
pass time.sleep(0.5)
except socket.timeout: except socket.timeout:
# no urls in the queue # no urls in the queue
self._browser_pool.release(browser) self._browser_pool.release(browser)
@ -122,9 +125,6 @@ class AmqpBrowserController:
return callback return callback
def _browse_page(self, browser, client_id, url, parent_url_metadata): def _browse_page(self, browser, client_id, url, parent_url_metadata):
"""Kombu Consumer callback. Provisions a Browser and
asynchronously asks it to browse the requested url."""
def on_request(chrome_msg): def on_request(chrome_msg):
payload = chrome_msg['params']['request'] payload = chrome_msg['params']['request']
payload['parentUrl'] = url payload['parentUrl'] = url
@ -136,7 +136,9 @@ class AmqpBrowserController:
def browse_page_async(): def browse_page_async():
self.logger.info('browser={} client_id={} url={}'.format(browser, client_id, url)) self.logger.info('browser={} client_id={} url={}'.format(browser, client_id, url))
try:
browser.browse_page(url, on_request=on_request) browser.browse_page(url, on_request=on_request)
finally:
self._browser_pool.release(browser) self._browser_pool.release(browser)
import random import random