mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-02 19:46:36 -04:00
refactor to simplify run()
This commit is contained in:
parent
5c701abb36
commit
8366bd2d66
1 changed files with 38 additions and 25 deletions
|
@ -149,24 +149,35 @@ class BrozzlerWorker:
|
||||||
self._disclaim_site(site, page)
|
self._disclaim_site(site, page)
|
||||||
self._browser_pool.release(browser)
|
self._browser_pool.release(browser)
|
||||||
|
|
||||||
def run(self):
|
def _claim_site(self, q):
|
||||||
latest_state = None
|
"""Reads message from SimpleQueue q, fires off thread to brozzle the
|
||||||
while not self._shutdown_requested.is_set():
|
site. Raises KeyError if no browsers are available, kombu.simple.Empty
|
||||||
with kombu.Connection(self._amqp_url) as conn:
|
if queue is empty."""
|
||||||
q = conn.SimpleQueue("brozzler.sites.unclaimed")
|
|
||||||
q_empty = False
|
|
||||||
if len(q) > 0:
|
|
||||||
try:
|
|
||||||
browser = self._browser_pool.acquire()
|
browser = self._browser_pool.acquire()
|
||||||
try:
|
try:
|
||||||
msg = q.get(block=True, timeout=0.5)
|
msg = q.get(block=True, timeout=0.5)
|
||||||
site = brozzler.Site(**msg.payload)
|
site = brozzler.Site(**msg.payload)
|
||||||
msg.ack() # XXX ack only after browsing finished? kinda complicated
|
msg.ack()
|
||||||
self.logger.info("brozzling site {}".format(site))
|
self.logger.info("brozzling site {}".format(site))
|
||||||
ydl = self._youtube_dl(site)
|
ydl = self._youtube_dl(site)
|
||||||
th = threading.Thread(target=lambda: self._brozzle_site(browser, ydl, site),
|
th = threading.Thread(target=lambda: self._brozzle_site(browser, ydl, site),
|
||||||
name="BrowsingThread-{}".format(site.scope_surt))
|
name="BrowsingThread-{}".format(site.scope_surt))
|
||||||
th.start()
|
th.start()
|
||||||
|
except:
|
||||||
|
self._browser_pool.release(browser)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
latest_state = None
|
||||||
|
while not self._shutdown_requested.is_set():
|
||||||
|
try:
|
||||||
|
# XXX too much connecting and disconnecting from rabbitmq
|
||||||
|
with kombu.Connection(self._amqp_url) as conn:
|
||||||
|
q = conn.SimpleQueue("brozzler.sites.unclaimed")
|
||||||
|
q_empty = False
|
||||||
|
if len(q) > 0:
|
||||||
|
try:
|
||||||
|
self._claim_site(q)
|
||||||
except kombu.simple.Empty:
|
except kombu.simple.Empty:
|
||||||
q_empty = True
|
q_empty = True
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -181,6 +192,8 @@ class BrozzlerWorker:
|
||||||
self.logger.info("no unclaimed sites to browse")
|
self.logger.info("no unclaimed sites to browse")
|
||||||
latest_state = "no-unclaimed-sites"
|
latest_state = "no-unclaimed-sites"
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
except OSError as e:
|
||||||
|
self.logger.warn("continuing after i/o exception (from rabbitmq?)", exc_info=True)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
th = threading.Thread(target=self.run, name="BrozzlerWorker")
|
th = threading.Thread(target=self.run, name="BrozzlerWorker")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue