Merge branch 'master' into qa

* master:
  catch exception from rethinkdb when unregistering from the service registry at shutdown
  correctly handle site with no pages (which means the seed was blocked by robots.txt) in frontier.seed_page
This commit is contained in:
Noah Levitt 2016-05-11 00:47:05 +00:00
commit 7dd841cae2
3 changed files with 9 additions and 2 deletions

View File

@ -313,6 +313,8 @@ class RethinkDbFrontier:
if len(pages) > 1:
self.logger.warn(
"more than one seed page for site_id %s ?", site_id)
if len(pages) < 1:
return None
return brozzler.Page(**pages[0])
def site_pages(self, site_id, unbrozzled_only=False):

View File

@ -360,7 +360,12 @@ class BrozzlerWorker:
self.logger.critical("thread exiting due to unexpected exception", exc_info=True)
finally:
if self._service_registry and hasattr(self, "status_info"):
self._service_registry.unregister(self.status_info["id"])
try:
self._service_registry.unregister(self.status_info["id"])
except:
self.logger.error(
"failed to unregister from service registry",
exc_info=True)
def start(self):
th = threading.Thread(target=self.run, name="BrozzlerWorker")

View File

@ -20,7 +20,7 @@ import setuptools
import glob
setuptools.setup(name='brozzler',
version='1.1.dev9',
version='1.1.dev10',
description='Distributed web crawling with browsers',
url='https://github.com/nlevitt/brozzler',
author='Noah Levitt',