diff --git a/brozzler/worker.py b/brozzler/worker.py index dfb0bc5..5273fc9 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -9,8 +9,8 @@ import signal import youtube_dl import urllib.request import json - -__all__ = ["BrozzlerWorker"] +import PIL.Image +import io class BrozzlerWorker: logger = logging.getLogger(__module__ + "." + __qualname__) @@ -85,6 +85,15 @@ class BrozzlerWorker: else: raise + def make_thumbnail(self, large_png): + img = PIL.Image.open(io.BytesIO(large_png)) + thumb_width = 300 + thumb_height = (thumb_width / img.size[0]) * img.size[1] + img.thumbnail((thumb_width, thumb_height)) + out = io.BytesIO() + img.save(out, "jpeg", quality=95) + return out.getbuffer() + def brozzle_page(self, browser, ydl, site, page): def on_screenshot(screenshot_png): if site.proxy and site.enable_warcprox_features: @@ -94,7 +103,12 @@ class BrozzlerWorker: warc_type="resource", content_type="image/png", payload=screenshot_png, extra_headers=site.extra_headers) - # XXX thumbnail + thumbnail_jpg = self.make_thumbnail(screenshot_png) + self._warcprox_write_record(warcprox_address=site.proxy, + url="thumbnail:{}".format(page.url), + warc_type="resource", content_type="image/jpeg", + payload=thumbnail_jpg, + extra_headers=site.extra_headers) self.logger.info("brozzling {}".format(page)) try: diff --git a/requirements.txt b/requirements.txt index 423f701..3216650 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,9 @@ PyYAML git+https://github.com/nlevitt/surt.git@py3 +# -e /home/nlevitt/workspace/surt git+https://github.com/nlevitt/youtube-dl.git@brozzler git+https://github.com/seomoz/reppy.git # https://github.com/seomoz/reppy/commit/7661606c not in pypi package requests git+https://github.com/nlevitt/websocket-client.git@tweaks rethinkdb +pillow