mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
uncommented init imports, removed required job_id in Frontier.finished
This commit is contained in:
parent
d1aebb0258
commit
20bde1c482
@ -43,9 +43,9 @@ class BaseDictable:
|
||||
return "{}(**{})".format(self.__class__.__name__, self.to_dict())
|
||||
|
||||
from brozzler.site import Page, Site
|
||||
# from brozzler.worker import BrozzlerWorker
|
||||
from brozzler.worker import BrozzlerWorker
|
||||
from brozzler.robots import is_permitted_by_robots
|
||||
from brozzler.frontier import RethinkDbFrontier
|
||||
# from brozzler.browser import Browser, BrowserPool
|
||||
from brozzler.browser import Browser, BrowserPool
|
||||
from brozzler.job import new_job, new_site, Job
|
||||
|
||||
|
@ -11,10 +11,10 @@ class UnexpectedDbResult(Exception):
|
||||
class RethinkDbFrontier:
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
|
||||
def __init__(self, r, shards=3, replicas=3):
|
||||
def __init__(self, r, shards=None, replicas=None):
|
||||
self.r = r
|
||||
self.shards = shards
|
||||
self.replicas = replicas
|
||||
self.shards = shards or len(r.servers)
|
||||
self.replicas = replicas or min(len(r.servers), 3)
|
||||
self._ensure_db()
|
||||
|
||||
def _ensure_db(self):
|
||||
@ -185,7 +185,8 @@ class RethinkDbFrontier:
|
||||
self.logger.info("%s %s", site, status)
|
||||
site.status = status
|
||||
self.update_site(site)
|
||||
self._maybe_finish_job(site.job_id)
|
||||
if site.job_id:
|
||||
self._maybe_finish_job(site.job_id)
|
||||
|
||||
def disclaim_site(self, site, page=None):
|
||||
self.logger.info("disclaiming %s", site)
|
||||
|
Loading…
x
Reference in New Issue
Block a user