mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-14 09:15:54 -04:00
Merge branch 'master' into qa
* master: catch exception from rethinkdb when unregistering from the service registry at shutdown correctly handle site with no pages (which means the seed was blocked by robots.txt) in frontier.seed_page
This commit is contained in:
commit
7dd841cae2
3 changed files with 9 additions and 2 deletions
|
@ -313,6 +313,8 @@ class RethinkDbFrontier:
|
|||
if len(pages) > 1:
|
||||
self.logger.warn(
|
||||
"more than one seed page for site_id %s ?", site_id)
|
||||
if len(pages) < 1:
|
||||
return None
|
||||
return brozzler.Page(**pages[0])
|
||||
|
||||
def site_pages(self, site_id, unbrozzled_only=False):
|
||||
|
|
|
@ -360,7 +360,12 @@ class BrozzlerWorker:
|
|||
self.logger.critical("thread exiting due to unexpected exception", exc_info=True)
|
||||
finally:
|
||||
if self._service_registry and hasattr(self, "status_info"):
|
||||
self._service_registry.unregister(self.status_info["id"])
|
||||
try:
|
||||
self._service_registry.unregister(self.status_info["id"])
|
||||
except:
|
||||
self.logger.error(
|
||||
"failed to unregister from service registry",
|
||||
exc_info=True)
|
||||
|
||||
def start(self):
|
||||
th = threading.Thread(target=self.run, name="BrozzlerWorker")
|
||||
|
|
2
setup.py
2
setup.py
|
@ -20,7 +20,7 @@ import setuptools
|
|||
import glob
|
||||
|
||||
setuptools.setup(name='brozzler',
|
||||
version='1.1.dev9',
|
||||
version='1.1.dev10',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/nlevitt/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue