From 700b08b7d71be93377e76cb55e991ab0a4e9692f Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 28 Feb 2017 16:12:50 -0800 Subject: [PATCH] use new rethinkstuff ORM --- brozzler/__init__.py | 16 ------ brozzler/browser.py | 1 - brozzler/cli.py | 38 +++++++------- brozzler/frontier.py | 122 +++++++++++-------------------------------- brozzler/job.py | 80 ++++++++++++++-------------- brozzler/site.py | 104 +++++++++++------------------------- brozzler/worker.py | 16 ++++-- setup.py | 4 +- 8 files changed, 134 insertions(+), 247 deletions(-) diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 7b0559c..3caec57 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -50,20 +50,6 @@ class ReachedLimit(Exception): def __str__(self): return self.__repr__() -class BaseDictable: - def to_dict(self): - d = dict(vars(self)) - for k in vars(self): - if k.startswith("_") or d[k] is None: - del d[k] - return d - - def to_json(self): - return json.dumps(self.to_dict(), separators=(',', ':')) - - def __repr__(self): - return "{}(**{})".format(self.__class__.__name__, self.to_dict()) - def fixup(url, hash_strip=False): ''' Does rudimentary canonicalization, such as converting IDN to punycode. @@ -81,8 +67,6 @@ def fixup(url, hash_strip=False): TRACE = 5 import logging as _logging def _logging_trace(msg, *args, **kwargs): - if len(_logging.root.handlers) == 0: - basicConfig() _logging.root.trace(msg, *args, **kwargs) def _logger_trace(self, msg, *args, **kwargs): if self.isEnabledFor(TRACE): diff --git a/brozzler/browser.py b/brozzler/browser.py index 9cbb100..a5302a5 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -97,7 +97,6 @@ class BrowserPool: with self._lock: for browser in self._in_use: browser.stop() - self._in_use.clear() def num_available(self): return self.size - len(self._in_use) diff --git a/brozzler/cli.py b/brozzler/cli.py index 08d3ad9..5795c1f 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -174,12 +174,12 @@ def brozzle_page(): behavior_parameters = {} if args.behavior_parameters: behavior_parameters = json.loads(args.behavior_parameters) - site = brozzler.Site( - id=-1, seed=args.url, proxy=args.proxy, - enable_warcprox_features=args.enable_warcprox_features, - behavior_parameters=behavior_parameters, username=args.username, - password=args.password) - page = brozzler.Page(url=args.url, site_id=site.id) + site = brozzler.Site(None, { + 'id': -1, 'seed': args.url, 'proxy': args.proxy, + 'enable_warcprox_features': args.enable_warcprox_features, + 'behavior_parameters': behavior_parameters, + 'username': args.username, 'password': args.password}) + page = brozzler.Page(None, {'url': args.url, 'site_id': site.id}) worker = brozzler.BrozzlerWorker(frontier=None) def on_screenshot(screenshot_png): @@ -273,24 +273,26 @@ def brozzler_new_site(): args = arg_parser.parse_args(args=sys.argv[1:]) configure_logging(args) - site = brozzler.Site( - seed=args.seed, proxy=args.proxy, - time_limit=int(args.time_limit) if args.time_limit else None, - ignore_robots=args.ignore_robots, - enable_warcprox_features=args.enable_warcprox_features, - warcprox_meta=json.loads( - args.warcprox_meta) if args.warcprox_meta else None, - behavior_parameters=json.loads( - args.behavior_parameters) if args.behavior_parameters else None, - username=args.username, password=args.password) - r = rethinker(args) + site = brozzler.Site(r, { + 'seed': args.seed, + 'proxy': args.proxy, + 'time_limit': int(args.time_limit) if args.time_limit else None, + 'ignore_robots': args.ignore_robots, + 'enable_warcprox_features': args.enable_warcprox_features, + 'warcprox_meta': json.loads( + args.warcprox_meta) if args.warcprox_meta else None, + 'behavior_parameters': json.loads( + args.behavior_parameters) if args.behavior_parameters else None, + 'username': args.username, + 'password': args.password}) + frontier = brozzler.RethinkDbFrontier(r) brozzler.new_site(frontier, site) def brozzler_worker(): ''' - Main entrypoint for brozzler, gets sites and pages to brozzle from + Main entry point for brozzler, gets sites and pages to brozzle from rethinkdb, brozzles them. ''' arg_parser = argparse.ArgumentParser( diff --git a/brozzler/frontier.py b/brozzler/frontier.py index fbf9714..e17089b 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -92,43 +92,6 @@ class RethinkDbFrontier: if result.get(k) != expected: raise UnexpectedDbResult("expected {} to be {} in {}".format(repr(k), expected, result)) - def new_job(self, job): - self.logger.info("inserting into 'jobs' table %s", repr(job)) - result = self.r.table("jobs").insert(job.to_dict()).run() - self._vet_result(result, inserted=1) - if not job.id: - # only if "id" has not already been set - job.id = result["generated_keys"][0] - return job - - def new_site(self, site): - self.logger.info("inserting into 'sites' table %s", site) - result = self.r.table("sites").insert(site.to_dict()).run() - self._vet_result(result, inserted=1) - if not site.id: - # only if "id" has not already been set - site.id = result["generated_keys"][0] - - def update_job(self, job): - self.logger.debug("updating 'jobs' table entry %s", job) - result = self.r.table("jobs").get(job.id).replace(job.to_dict()).run() - self._vet_result(result, replaced=[0,1], unchanged=[0,1]) - - def update_site(self, site): - self.logger.debug("updating 'sites' table entry %s", site) - result = self.r.table("sites").get(site.id).replace(site.to_dict()).run() - self._vet_result(result, replaced=[0,1], unchanged=[0,1]) - - def update_page(self, page): - self.logger.trace("updating 'pages' table entry %s", page) - result = self.r.table("pages").get(page.id).replace(page.to_dict()).run() - self._vet_result(result, replaced=[0,1], unchanged=[0,1]) - - def new_page(self, page): - self.logger.trace("inserting into 'pages' table %s", page) - result = self.r.table("pages").insert(page.to_dict()).run() - self._vet_result(result, inserted=1) - def claim_site(self, worker_id): # XXX keep track of aggregate priority and prioritize sites accordingly? while True: @@ -165,7 +128,7 @@ class RethinkDbFrontier: "at %s, and presumably some error stopped it from " "being disclaimed", result["changes"][0]["old_val"]["last_claimed"]) - site = brozzler.Site(**result["changes"][0]["new_val"]) + site = brozzler.Site(self.r, result["changes"][0]["new_val"]) else: raise brozzler.NothingToClaim # XXX This is the only place we enforce time limit for now. Worker @@ -204,7 +167,7 @@ class RethinkDbFrontier: if result["unchanged"] == 0 and result["replaced"] == 0: raise brozzler.NothingToClaim else: - return brozzler.Page(**result["changes"][0]["new_val"]) + return brozzler.Page(self.r, result["changes"][0]["new_val"]) def has_outstanding_pages(self, site): results_iter = self.r.table("pages").between( @@ -213,55 +176,30 @@ class RethinkDbFrontier: index="priority_by_site").limit(1).run() return len(list(results_iter)) > 0 - def page(self, id): - result = self.r.table("pages").get(id).run() - if result: - return brozzler.Page(**result) - else: - return None - def completed_page(self, site, page): page.brozzle_count += 1 page.claimed = False # XXX set priority? - self.update_page(page) + page.save() if page.redirect_url and page.hops_from_seed == 0: site.note_seed_redirect(page.redirect_url) - self.update_site(site) + site.save() def active_jobs(self): results = self.r.table("jobs").filter({"status":"ACTIVE"}).run() for result in results: - yield brozzler.Job(**result) - - def job(self, id): - if id is None: - return None - result = self.r.table("jobs").get(id).run() - if result: - return brozzler.Job(**result) - else: - return None - - def site(self, id): - if id is None: - return None - result = self.r.table("sites").get(id).run() - if result: - return brozzler.Site(**result) - else: - return None + yield brozzler.Job(self.r, result) def honor_stop_request(self, job_id): """Raises brozzler.CrawlJobStopped if stop has been requested.""" - job = self.job(job_id) - if job and job.stop_requested: + job = brozzler.Job.load(self.r, job_id) + if job and job.get('stop_requested'): self.logger.info("stop requested for job %s", job_id) raise brozzler.CrawlJobStopped def _maybe_finish_job(self, job_id): """Returns True if job is finished.""" - job = self.job(job_id) + job = brozzler.Job.load(self.r, job_id) if not job: return False if job.status.startswith("FINISH"): @@ -271,7 +209,7 @@ class RethinkDbFrontier: results = self.r.table("sites").get_all(job_id, index="job_id").run() n = 0 for result in results: - site = brozzler.Site(**result) + site = brozzler.Site(self.r, result) if not site.status.startswith("FINISH"): results.close() return False @@ -279,7 +217,7 @@ class RethinkDbFrontier: self.logger.info("all %s sites finished, job %s is FINISHED!", n, job.id) job.finish() - self.update_job(job) + job.save() return True def finished(self, site, status): @@ -288,7 +226,7 @@ class RethinkDbFrontier: site.claimed = False site.last_disclaimed = rethinkstuff.utcnow() site.starts_and_stops[-1]["stop"] = rethinkstuff.utcnow() - self.update_site(site) + site.save() if site.job_id: self._maybe_finish_job(site.job_id) @@ -299,34 +237,34 @@ class RethinkDbFrontier: if not page and not self.has_outstanding_pages(site): self.finished(site, "FINISHED") else: - self.update_site(site) + site.save() if page: page.claimed = False - self.update_page(page) + page.save() def resume_job(self, job): job.status = "ACTIVE" job.starts_and_stops.append( {"start":rethinkstuff.utcnow(), "stop":None}) - self.update_job(job) + job.save() for site in self.job_sites(job.id): site.status = "ACTIVE" site.starts_and_stops.append( {"start":rethinkstuff.utcnow(), "stop":None}) - self.update_site(site) + site.save() def resume_site(self, site): if site.job_id: # can't call resume_job since that would resume jobs's other sites - job = self.job(site.job_id) + job = brozzler.job.load(self.r, site.job_id) job.status = "ACTIVE" job.starts_and_stops.append( {"start":rethinkstuff.utcnow(), "stop":None}) - self.update_job(job) + job.save() site.status = "ACTIVE" site.starts_and_stops.append( {"start":rethinkstuff.utcnow(), "stop":None}) - self.update_site(site) + site.save() def scope_and_schedule_outlinks(self, site, parent_page, outlinks): if site.remember_outlinks: @@ -340,18 +278,18 @@ class RethinkDbFrontier: hops_off_surt = parent_page.hops_off_surt + 1 else: hops_off_surt = 0 - new_child_page = brozzler.Page( - url, site_id=site.id, job_id=site.job_id, - hops_from_seed=parent_page.hops_from_seed+1, - via_page_id=parent_page.id, - hops_off_surt=hops_off_surt) - existing_child_page = self.page(new_child_page.id) + new_child_page = brozzler.Page(self.r, { + 'url': url, 'site_id': site.id, 'job_id': site.job_id, + 'hops_from_seed': parent_page.hops_from_seed+1, + 'via_page_id': parent_page.id, + 'hops_off_surt': hops_off_surt}) + existing_child_page = brozzler.Page.load(new_child_page.id) if existing_child_page: existing_child_page.priority += new_child_page.priority - self.update_page(existing_child_page) + existing_child_page.save() counts["updated"] += 1 else: - self.new_page(new_child_page) + new_child_page.save() counts["added"] += 1 if site.remember_outlinks: parent_page.outlinks["accepted"].append(url) @@ -365,7 +303,7 @@ class RethinkDbFrontier: parent_page.outlinks["rejected"].append(url) if site.remember_outlinks: - self.update_page(parent_page) + parent_page.save() self.logger.info( "%s new links added, %s existing links updated, %s links " @@ -386,7 +324,7 @@ class RethinkDbFrontier: def job_sites(self, job_id): results = self.r.table('sites').get_all(job_id, index="job_id").run() for result in results: - yield brozzler.Site(**result) + yield brozzler.Site(self.r, result) def seed_page(self, site_id): results = self.r.table("pages").between( @@ -399,7 +337,7 @@ class RethinkDbFrontier: "more than one seed page for site_id %s ?", site_id) if len(pages) < 1: return None - return brozzler.Page(**pages[0]) + return brozzler.Page(self.r, pages[0]) def site_pages(self, site_id, unbrozzled_only=False): results = self.r.table("pages").between( @@ -409,5 +347,5 @@ class RethinkDbFrontier: self.r.maxval, self.r.maxval], index="priority_by_site").run() for result in results: - yield brozzler.Page(**result) + yield brozzler.Page(self.r, result) diff --git a/brozzler/job.py b/brozzler/job.py index 1d91621..40c3ee3 100644 --- a/brozzler/job.py +++ b/brozzler/job.py @@ -70,31 +70,32 @@ def new_job_file(frontier, job_conf_file): def new_job(frontier, job_conf): '''Returns new Job.''' validate_conf(job_conf) - job = Job( - id=job_conf.get("id"), conf=job_conf, status="ACTIVE", - started=rethinkstuff.utcnow()) - - # insert the job now to make sure it has an id - job = frontier.new_job(job) + job = Job(frontier.r, { + "conf": job_conf, + "status": "ACTIVE", "started": rethinkstuff.utcnow()}) + if "id" in job_conf: + job.id = job_conf["id"] + job.save() sites = [] for seed_conf in job_conf["seeds"]: merged_conf = merge(seed_conf, job_conf) - site = brozzler.Site( - job_id=job.id, seed=merged_conf["url"], - scope=merged_conf.get("scope"), - time_limit=merged_conf.get("time_limit"), - proxy=merged_conf.get("proxy"), - ignore_robots=merged_conf.get("ignore_robots"), - enable_warcprox_features=merged_conf.get( - "enable_warcprox_features"), - warcprox_meta=merged_conf.get("warcprox_meta"), - metadata=merged_conf.get("metadata"), - remember_outlinks=merged_conf.get("remember_outlinks"), - user_agent=merged_conf.get("user_agent"), - behavior_parameters=merged_conf.get("behavior_parameters"), - username=merged_conf.get("username"), - password=merged_conf.get("password")) + site = brozzler.Site(frontier.r, { + "job_id": job.id, + "seed": merged_conf["url"], + "scope": merged_conf.get("scope"), + "time_limit": merged_conf.get("time_limit"), + "proxy": merged_conf.get("proxy"), + "ignore_robots": merged_conf.get("ignore_robots"), + "enable_warcprox_features": merged_conf.get( + "enable_warcprox_features"), + "warcprox_meta": merged_conf.get("warcprox_meta"), + "metadata": merged_conf.get("metadata"), + "remember_outlinks": merged_conf.get("remember_outlinks"), + "user_agent": merged_conf.get("user_agent"), + "behavior_parameters": merged_conf.get("behavior_parameters"), + "username": merged_conf.get("username"), + "password": merged_conf.get("password")}) sites.append(site) for site in sites: @@ -110,31 +111,31 @@ def new_site(frontier, site): # where a brozzler worker immediately claims the site, finds no pages # to crawl, and decides the site is finished try: - page = brozzler.Page( - site.seed, site_id=site.id, job_id=site.job_id, - hops_from_seed=0, priority=1000, needs_robots_check=True) - frontier.new_page(page) + page = brozzler.Page(frontier.r, { + "url": site.seed, "site_id": site.get("id"), + "job_id": site.get("job_id"), "hops_from_seed": 0, + "priority": 1000, "needs_robots_check": True}) + page.save() logging.info("queued page %s", page) finally: # finally block because we want to insert the Site no matter what - frontier.new_site(site) + site.save() except brozzler.ReachedLimit as e: frontier.reached_limit(site, e) -class Job(brozzler.BaseDictable): +class Job(rethinkstuff.Document): logger = logging.getLogger(__module__ + "." + __qualname__) + table = "jobs" - def __init__( - self, id=None, conf=None, status="ACTIVE", started=None, - finished=None, stop_requested=None, starts_and_stops=None): - self.id = id - self.conf = conf - self.status = status - self.stop_requested = stop_requested - self.starts_and_stops = starts_and_stops - if not self.starts_and_stops: - if started: # backward compatibility - self.starts_and_stops = [{"start":started,"stop":finished}] + def __init__(self, rethinker, d={}): + rethinkstuff.Document.__init__(self, rethinker, d) + self.status = self.get("status", "ACTIVE") + if not "starts_and_stops" in self: + if self.get("started"): # backward compatibility + self.starts_and_stops = [{ + "start": self.get("started"), + "stop": self.get("finished")}] + del self["started"] else: self.starts_and_stops = [ {"start":rethinkstuff.utcnow(),"stop":None}] @@ -148,6 +149,3 @@ class Job(brozzler.BaseDictable): self.status = "FINISHED" self.starts_and_stops[-1]["stop"] = rethinkstuff.utcnow() - def __str__(self): - return 'Job(id=%s)' % self.id - diff --git a/brozzler/site.py b/brozzler/site.py index 215d53e..55f0e6d 100644 --- a/brozzler/site.py +++ b/brozzler/site.py @@ -88,53 +88,29 @@ class Url: return host_parts[-len(domain_parts):] == domain_parts -class Site(brozzler.BaseDictable): +class Site(rethinkstuff.Document): logger = logging.getLogger(__module__ + "." + __qualname__) + table = 'sites' - def __init__( - self, seed, id=None, job_id=None, scope=None, proxy=None, - ignore_robots=False, time_limit=None, warcprox_meta=None, - enable_warcprox_features=False, reached_limit=None, - status="ACTIVE", claimed=False, start_time=None, - last_disclaimed=_EPOCH_UTC, last_claimed_by=None, - last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None, - cookie_db=None, user_agent=None, behavior_parameters=None, - username=None, password=None, starts_and_stops=None): - - self.seed = seed - self.id = id - self.job_id = job_id - self.proxy = proxy - self.ignore_robots = ignore_robots - self.enable_warcprox_features = bool(enable_warcprox_features) - self.warcprox_meta = warcprox_meta - self.time_limit = time_limit - self.reached_limit = reached_limit - self.status = status - self.claimed = bool(claimed) - self.last_claimed_by = last_claimed_by - self.last_disclaimed = last_disclaimed - self.last_claimed = last_claimed - self.metadata = metadata - self.remember_outlinks = remember_outlinks - self.cookie_db = cookie_db - self.user_agent = user_agent - self.behavior_parameters = behavior_parameters - self.username = username - self.password = password - self.starts_and_stops = starts_and_stops - if not self.starts_and_stops: - if start_time: # backward compatibility + def __init__(self, rethinker, d={}): + rethinkstuff.Document.__init__(self, rethinker, d) + if not self.get('status'): + self.status = 'ACTIVE' + self.enable_warcprox_features = bool(self.get('enable_warcprox_features')) + self.claimed = bool(self.get('claimed')) + self.last_disclaimed = self.get('last_disclaimed', _EPOCH_UTC) + self.last_claimed = self.get('last_claimed', _EPOCH_UTC) + if not self.get('starts_and_stops'): + if self.get('start_time'): # backward compatibility self.starts_and_stops = [{"start":start_time,"stop":None}] - if self.status != "ACTIVE": + if self.get('status') != "ACTIVE": self.starts_and_stops[0]["stop"] = self.last_disclaimed else: self.starts_and_stops = [ {"start":rethinkstuff.utcnow(),"stop":None}] - self.scope = scope or {} - if not "surt" in self.scope: - self.scope["surt"] = Url(seed).surt + def __str__(self): + return 'Site({"id":"%s","seed":"%s",...})' % (self.id, self.seed) def elapsed(self): '''Returns elapsed crawl time as a float in seconds.''' @@ -148,9 +124,6 @@ class Site(brozzler.BaseDictable): dt += (rethinkstuff.utcnow() - ss['start']).total_seconds() return dt - def __str__(self): - return "Site-%s-%s" % (self.id, self.seed) - def note_seed_redirect(self, url): new_scope_surt = Url(url).surt if not new_scope_surt.startswith(self.scope["surt"]): @@ -305,41 +278,26 @@ class Site(brozzler.BaseDictable): return True -class Page(brozzler.BaseDictable): - def __init__( - self, url, id=None, site_id=None, job_id=None, hops_from_seed=0, - redirect_url=None, priority=None, claimed=False, brozzle_count=0, - via_page_id=None, last_claimed_by=None, hops_off_surt=0, - outlinks=None, needs_robots_check=False, blocked_by_robots=None): - self.site_id = site_id - self.job_id = job_id - self.url = url - self.hops_from_seed = hops_from_seed - self.redirect_url = redirect_url - self.claimed = bool(claimed) - self.last_claimed_by = last_claimed_by - self.brozzle_count = brozzle_count - self.via_page_id = via_page_id - self.hops_off_surt = hops_off_surt - self.outlinks = outlinks - self.needs_robots_check = needs_robots_check - self.blocked_by_robots = blocked_by_robots +class Page(rethinkstuff.Document): + logger = logging.getLogger(__module__ + "." + __qualname__) + table = 'pages' + + def __init__(self, rethinker, d={}): + rethinkstuff.Document.__init__(self, rethinker, d) + self.hops_from_seed = self.get('hops_from_seed', 0) + self.brozzle_count = self.get('brozzle_count', 0) + self.claimed = self.get('claimed', False) + self.hops_off_surt = self.get('hops_off_surt', 0) + self.needs_robots_check = self.get('needs_robots_check', False) self._canon_hurl = None - if priority is not None: - self.priority = priority - else: - self.priority = self._calc_priority() - - if id is not None: - self.id = id - else: - digest_this = "site_id:{},url:{}".format(self.site_id, self.url) + self.priority = self.get('priority', self._calc_priority()) + if self.get('id') is None: + digest_this = "site_id:%s,url:%s" % (self.site_id, self.url) self.id = hashlib.sha1(digest_this.encode("utf-8")).hexdigest() - def __repr__(self): - return """Page(url={},job_id={},site_id={},hops_from_seed={})""".format( - repr(self.url), self.job_id, self.site_id, self.hops_from_seed) + def __str__(self): + return 'Page({"id":"%s","url":"%s",...})' % (self.id, self.url) def note_redirect(self, url): self.redirect_url = url diff --git a/brozzler/worker.py b/brozzler/worker.py index eb2fed9..03e1476 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -126,6 +126,10 @@ class BrozzlerWorker: site.enable_warcprox_features or self._default_enable_warcprox_features): svc = self._service_registry.available_service('warcprox') + if svc is None: + raise Exception( + 'no available instances of warcprox in the service ' + 'registry') site.proxy = '%s:%s' % (svc['host'], svc['port']) self._frontier.update_site(site) self.logger.info( @@ -284,12 +288,15 @@ class BrozzlerWorker: if self._needs_browsing(page, ydl_spy): self.logger.info('needs browsing: %s', page) if not browser.is_running(): - browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db) + browser.start( + proxy=self._proxy(site), + cookie_db=site.get('cookie_db')) final_page_url, outlinks = browser.browse_page( page.url, extra_headers=site.extra_headers(), - behavior_parameters=site.behavior_parameters, - username=site.username, password=site.password, - user_agent=site.user_agent, + behavior_parameters=site.get('behavior_parameters'), + username=site.get('username'), + password=site.get('password'), + user_agent=site.get('user_agent'), on_screenshot=_on_screenshot) if final_page_url != page.url: page.note_redirect(final_page_url) @@ -408,6 +415,7 @@ class BrozzlerWorker: self._service_heartbeat() def run(self): + self.logger.info("brozzler worker starting") try: latest_state = None while True: diff --git a/setup.py b/setup.py index 9f6ee34..3c1a8f6 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b9.dev196', + version='1.1b9.dev197', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', @@ -69,7 +69,7 @@ setuptools.setup( 'websocket-client!=0.39.0', 'pillow==3.3.0', 'surt>=0.3.0', - 'rethinkstuff>=0.1.5', + 'rethinkstuff>=0.2.0.dev62', 'rethinkdb>=2.3,<2.4', 'cerberus==1.0.1', 'jinja2',