create thumbnail of screenshot and send to warcprox

This commit is contained in:
Noah Levitt 2015-09-07 06:27:21 +00:00
parent 565ab5f936
commit 9698b0f847
2 changed files with 19 additions and 3 deletions

View file

@ -9,8 +9,8 @@ import signal
import youtube_dl
import urllib.request
import json
__all__ = ["BrozzlerWorker"]
import PIL.Image
import io
class BrozzlerWorker:
logger = logging.getLogger(__module__ + "." + __qualname__)
@ -85,6 +85,15 @@ class BrozzlerWorker:
else:
raise
def make_thumbnail(self, large_png):
img = PIL.Image.open(io.BytesIO(large_png))
thumb_width = 300
thumb_height = (thumb_width / img.size[0]) * img.size[1]
img.thumbnail((thumb_width, thumb_height))
out = io.BytesIO()
img.save(out, "jpeg", quality=95)
return out.getbuffer()
def brozzle_page(self, browser, ydl, site, page):
def on_screenshot(screenshot_png):
if site.proxy and site.enable_warcprox_features:
@ -94,7 +103,12 @@ class BrozzlerWorker:
warc_type="resource", content_type="image/png",
payload=screenshot_png,
extra_headers=site.extra_headers)
# XXX thumbnail
thumbnail_jpg = self.make_thumbnail(screenshot_png)
self._warcprox_write_record(warcprox_address=site.proxy,
url="thumbnail:{}".format(page.url),
warc_type="resource", content_type="image/jpeg",
payload=thumbnail_jpg,
extra_headers=site.extra_headers)
self.logger.info("brozzling {}".format(page))
try:

View file

@ -1,7 +1,9 @@
PyYAML
git+https://github.com/nlevitt/surt.git@py3
# -e /home/nlevitt/workspace/surt
git+https://github.com/nlevitt/youtube-dl.git@brozzler
git+https://github.com/seomoz/reppy.git # https://github.com/seomoz/reppy/commit/7661606c not in pypi package
requests
git+https://github.com/nlevitt/websocket-client.git@tweaks
rethinkdb
pillow