diff --git a/bin/brozzler-hq b/bin/brozzler-hq index dd98066..3c4e421 100755 --- a/bin/brozzler-hq +++ b/bin/brozzler-hq @@ -25,9 +25,6 @@ args = arg_parser.parse_args(args=sys.argv[1:]) logging.basicConfig(stream=sys.stdout, level=args.log_level, format="%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s") -logging.info("brozzler-hq starting") - - def sigterm(signum, frame): raise brozzler.ShutdownRequested("shutdown requested (caught SIGTERM)") def sigint(signum, frame): @@ -36,6 +33,8 @@ def sigint(signum, frame): signal.signal(signal.SIGTERM, sigterm) signal.signal(signal.SIGINT, sigint) +logging.info("brozzler-hq starting") + db = brozzler.hq.BrozzlerHQDb(db_file=args.db_file) hq = brozzler.hq.BrozzlerHQ(amqp_url=args.amqp_url, db=db) diff --git a/brozzler/worker.py b/brozzler/worker.py index f751bac..75ac8ba 100755 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -107,6 +107,11 @@ class BrozzlerWorker: else: raise + def _on_screenshot(self, site, crawl_url, screenshot_png): + if self._proxy_server and self._enable_warcprox_features: + logging.info("sending PUTMETA request to warcprox with screenshot for {}".format(crawl_url)) + self._putmeta(url=crawl_url.url, content_type="image/png", payload=screenshot_png) + def _brozzle_site(self, browser, site): start = time.time() crawl_url = None @@ -117,7 +122,8 @@ class BrozzlerWorker: crawl_url = self._next_url(site) logging.info("crawling {}".format(crawl_url)) self._try_youtube_dl(site, crawl_url) - crawl_url.outlinks = browser.browse_page(crawl_url.url) + crawl_url.outlinks = browser.browse_page(crawl_url.url, + on_screenshot=lambda screenshot_png: self._on_screenshot(site, crawl_url, screenshot_png)) self._completed_url(site, crawl_url) crawl_url = None except kombu.simple.Empty: