diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 72934a8..704c0e5 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -313,6 +313,8 @@ class RethinkDbFrontier: if len(pages) > 1: self.logger.warn( "more than one seed page for site_id %s ?", site_id) + if len(pages) < 1: + return None return brozzler.Page(**pages[0]) def site_pages(self, site_id, unbrozzled_only=False): diff --git a/brozzler/worker.py b/brozzler/worker.py index f30705b..0f2f545 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -360,7 +360,12 @@ class BrozzlerWorker: self.logger.critical("thread exiting due to unexpected exception", exc_info=True) finally: if self._service_registry and hasattr(self, "status_info"): - self._service_registry.unregister(self.status_info["id"]) + try: + self._service_registry.unregister(self.status_info["id"]) + except: + self.logger.error( + "failed to unregister from service registry", + exc_info=True) def start(self): th = threading.Thread(target=self.run, name="BrozzlerWorker") diff --git a/setup.py b/setup.py index b21c89f..73ceca3 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ import setuptools import glob setuptools.setup(name='brozzler', - version='1.1.dev9', + version='1.1.dev10', description='Distributed web crawling with browsers', url='https://github.com/nlevitt/brozzler', author='Noah Levitt',