make umbra amenable to clustering by using a pool of n browsers and removing the browser-clientId affinity (not useful currently since we start a fresh browser instance for each page browsed), and set prefetch_count=1 on amqp consumers to round-robin incoming urls among umbra instances

This commit is contained in:
Noah Levitt 2014-05-23 21:59:34 -07:00
parent 8d269f4c56
commit 2c4ba005b5
4 changed files with 94 additions and 70 deletions

View file

@ -12,8 +12,44 @@ import subprocess
import signal
import tempfile
import os
import socket
from umbra.behaviors import Behavior
class BrowserPool:
def __init__(self, size=3, chrome_exe='chromium-browser', chrome_wait=60):
self._available = set()
for i in range(0, size):
port_holder = self._grab_random_port()
browser = Browser(port_holder.getsockname()[1], chrome_exe, chrome_wait)
self._available.add((browser, port_holder))
self._lock = threading.Lock()
def _grab_random_port(self):
"""Returns socket bound to some port."""
sock = socket.socket()
sock.bind(('127.0.0.1', 0))
return sock
def _hold_port(self, port):
"""Returns socket bound to supplied port."""
sock = socket.socket()
sock.bind(('127.0.0.1', port))
return sock
def acquire(self):
"""Returns browser from pool if available, raises KeyError otherwise."""
with self._lock:
(browser, port_holder) = self._available.pop()
port_holder.close()
return browser
def release(self, browser):
with self._lock:
port_holder = self._hold_port(browser.chrome_port)
self._available.add((browser, port_holder))
class Browser:
"""Runs chrome/chromium to synchronously browse one page at a time using
worker.browse_page(). Currently the implementation starts up a new instance
@ -143,11 +179,6 @@ class Chrome:
self.browser_wait = browser_wait
self.user_data_dir = user_data_dir
def fetch_debugging_json():
raw_json = urllib.request.urlopen("http://localhost:%s/json" % self.port).read()
json = raw_json.decode('utf-8')
return json.loads(json)
# returns websocket url to chrome window with about:blank loaded
def __enter__(self):
chrome_args = [self.executable,