sigterm and sigint both shutdown request shutdown, which stops consuming urls and waits for active browsers to finish; a second sigint/sigterm immediately shuts down active browsers

This commit is contained in:
Noah Levitt 2014-05-24 01:52:22 -07:00
parent b67d9fadf0
commit 9c08be2699
3 changed files with 54 additions and 15 deletions

View File

@ -9,6 +9,8 @@ import sys
import signal
import os
import umbra
import signal
import threading
if __name__=="__main__":
import faulthandler
@ -43,14 +45,34 @@ if __name__=="__main__":
exchange_name=args.amqp_exchange, queue_name=args.amqp_queue,
routing_key=args.amqp_routing_key)
class ShutdownRequested(Exception):
pass
def sigterm(signum, frame):
raise ShutdownRequested('shutdown requested (caught SIGTERM)')
def sigint(signum, frame):
raise ShutdownRequested('shutdown requested (caught SIGINT)')
signal.signal(signal.SIGTERM, sigterm)
signal.signal(signal.SIGINT, sigint)
umbra.start()
try:
while True:
time.sleep(0.5)
except:
pass
except ShutdownRequested as e:
logging.info(e)
except BaseException as e:
logging.fatal(e)
finally:
umbra.shutdown()
try:
umbra.shutdown()
for th in threading.enumerate():
if th != threading.current_thread():
th.join()
except BaseException as e:
logging.warn("caught {}".format(e))
umbra.shutdown_now()

View File

@ -20,6 +20,7 @@ class BrowserPool:
def __init__(self, size=3, chrome_exe='chromium-browser', chrome_wait=60):
self._available = set()
self._in_use = set()
for i in range(0, size):
port_holder = self._grab_random_port()
@ -47,12 +48,19 @@ class BrowserPool:
with self._lock:
(browser, port_holder) = self._available.pop()
port_holder.close()
self._in_use.add(browser)
return browser
def release(self, browser):
with self._lock:
port_holder = self._hold_port(browser.chrome_port)
self._available.add((browser, port_holder))
self._in_use.remove(browser)
def shutdown_now(self):
for browser in self._in_use:
browser.shutdown_now()
class Browser:
"""Runs chrome/chromium to synchronously browse one page at a time using
@ -72,6 +80,10 @@ class Browser:
self.chrome_wait = chrome_wait
self._behavior = None
self.websock = None
self._shutdown_now = False
def shutdown_now(self):
self._shutdown_now = True
def browse_page(self, url, on_request=None):
"""Synchronously browses a page and runs behaviors. First blocks to
@ -103,6 +115,9 @@ class Browser:
elif self._behavior != None and self._behavior.is_finished():
self.logger.info("finished browsing page according to behavior url={}".format(self.url))
break
elif self._shutdown_now:
self.logger.warn("immediate shutdown requested")
break
try:
self.websock.close()

View File

@ -73,9 +73,12 @@ class AmqpBrowserController:
self.logger.info("shutting down amqp consumer {}".format(self.amqp_url))
self._amqp_stop.set()
self._amqp_thread.join()
with self._producer_lock:
self._producer_conn.close()
self._producer_conn = None
# with self._producer_lock:
# self._producer_conn.close()
# self._producer_conn = None
def shutdown_now(self):
self._browser_pool.shutdown_now()
def _consume_amqp(self):
# XXX https://webarchive.jira.com/browse/ARI-3811
@ -86,11 +89,11 @@ class AmqpBrowserController:
# reopen the connection every 15 minutes
RECONNECT_AFTER_SECONDS = 15 * 60
browser = None
url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key,
exchange=self._exchange)
while not self._amqp_stop.is_set():
try:
url_queue = kombu.Queue(self.queue_name, routing_key=self.routing_key, exchange=self._exchange)
self.logger.info("connecting to amqp exchange={} at {}".format(self._exchange.name, self.amqp_url))
with kombu.Connection(self.amqp_url) as conn:
conn_opened = time.time()
@ -99,13 +102,13 @@ class AmqpBrowserController:
while (not self._amqp_stop.is_set() and time.time() - conn_opened < RECONNECT_AFTER_SECONDS):
import socket
try:
browser = self._browser_pool.acquire()
browser = self._browser_pool.acquire() # raises KeyError if none available
consumer.callbacks = [self._make_callback(browser)]
conn.drain_events(timeout=0.5)
consumer.callbacks = None
except KeyError:
# no browsers available
pass
time.sleep(0.5)
except socket.timeout:
# no urls in the queue
self._browser_pool.release(browser)
@ -122,9 +125,6 @@ class AmqpBrowserController:
return callback
def _browse_page(self, browser, client_id, url, parent_url_metadata):
"""Kombu Consumer callback. Provisions a Browser and
asynchronously asks it to browse the requested url."""
def on_request(chrome_msg):
payload = chrome_msg['params']['request']
payload['parentUrl'] = url
@ -136,8 +136,10 @@ class AmqpBrowserController:
def browse_page_async():
self.logger.info('browser={} client_id={} url={}'.format(browser, client_id, url))
browser.browse_page(url, on_request=on_request)
self._browser_pool.release(browser)
try:
browser.browse_page(url, on_request=on_request)
finally:
self._browser_pool.release(browser)
import random
threadName = "BrowsingThread{}-{}".format(browser.chrome_port,