mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
make brozzle-page utility save the screenshot to a file
This commit is contained in:
parent
87af7eaa73
commit
5a2ea2cea4
@ -26,6 +26,8 @@ import brozzler
|
||||
import re
|
||||
import warnings
|
||||
import requests
|
||||
import string
|
||||
import datetime
|
||||
|
||||
arg_parser = argparse.ArgumentParser(prog=os.path.basename(__file__),
|
||||
description="brozzle-page - brozzle a single page",
|
||||
@ -63,10 +65,21 @@ page = brozzler.Page(url=args.url, site_id=site.id)
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
ydl = worker._youtube_dl(site)
|
||||
|
||||
def on_screenshot(screenshot_png):
|
||||
OK_CHARS = (string.ascii_letters + string.digits)
|
||||
filename = "/tmp/{}-{:%Y%m%d%H%M%S}.png".format(
|
||||
"".join(ch if ch in OK_CHARS else "_" for ch in args.url),
|
||||
datetime.datetime.now())
|
||||
# logging.info("len(screenshot_png)=%s", len(screenshot_png))
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(screenshot_png)
|
||||
logging.info("wrote screenshot to %s", filename)
|
||||
|
||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||
browser.start(proxy=site.proxy)
|
||||
try:
|
||||
outlinks = worker.brozzle_page(browser, ydl, site, page)
|
||||
outlinks = worker.brozzle_page(
|
||||
browser, ydl, site, page, on_screenshot=on_screenshot)
|
||||
logging.info("outlinks: \n\t%s", "\n\t".join(sorted(outlinks)))
|
||||
except brozzler.ReachedLimit as e:
|
||||
logging.error("reached limit %s", e)
|
||||
|
@ -194,9 +194,11 @@ class BrozzlerWorker:
|
||||
|
||||
return full_jpeg, thumb_jpeg
|
||||
|
||||
def brozzle_page(self, browser, ydl, site, page):
|
||||
def on_screenshot(screenshot_png):
|
||||
if site.proxy and site.enable_warcprox_features:
|
||||
def brozzle_page(self, browser, ydl, site, page, on_screenshot=None):
|
||||
def _on_screenshot(screenshot_png):
|
||||
if on_screenshot:
|
||||
on_screenshot(screenshot_png)
|
||||
elif site.proxy and site.enable_warcprox_features:
|
||||
self.logger.info("sending WARCPROX_WRITE_RECORD request "
|
||||
"to warcprox with screenshot for %s", page)
|
||||
screenshot_jpeg, thumbnail_jpeg = self.full_and_thumb_jpegs(
|
||||
@ -228,7 +230,7 @@ class BrozzlerWorker:
|
||||
browser.start(proxy=site.proxy)
|
||||
outlinks = browser.browse_page(
|
||||
page.url, extra_headers=site.extra_headers,
|
||||
on_screenshot=on_screenshot,
|
||||
on_screenshot=_on_screenshot,
|
||||
on_url_change=page.note_redirect)
|
||||
return outlinks
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user