From 20bde1c482744084f0fee465b6b3fedb4e10f14a Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 22 Oct 2015 22:29:24 +0000 Subject: [PATCH] uncommented init imports, removed required job_id in Frontier.finished --- brozzler/__init__.py | 4 ++-- brozzler/frontier.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 1346336..d79328f 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -43,9 +43,9 @@ class BaseDictable: return "{}(**{})".format(self.__class__.__name__, self.to_dict()) from brozzler.site import Page, Site -# from brozzler.worker import BrozzlerWorker +from brozzler.worker import BrozzlerWorker from brozzler.robots import is_permitted_by_robots from brozzler.frontier import RethinkDbFrontier -# from brozzler.browser import Browser, BrowserPool +from brozzler.browser import Browser, BrowserPool from brozzler.job import new_job, new_site, Job diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 3c405f5..36bedd0 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -11,10 +11,10 @@ class UnexpectedDbResult(Exception): class RethinkDbFrontier: logger = logging.getLogger(__module__ + "." + __qualname__) - def __init__(self, r, shards=3, replicas=3): + def __init__(self, r, shards=None, replicas=None): self.r = r - self.shards = shards - self.replicas = replicas + self.shards = shards or len(r.servers) + self.replicas = replicas or min(len(r.servers), 3) self._ensure_db() def _ensure_db(self): @@ -185,7 +185,8 @@ class RethinkDbFrontier: self.logger.info("%s %s", site, status) site.status = status self.update_site(site) - self._maybe_finish_job(site.job_id) + if site.job_id: + self._maybe_finish_job(site.job_id) def disclaim_site(self, site, page=None): self.logger.info("disclaiming %s", site)