mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-03 03:56:24 -04:00
create thumbnail of screenshot and send to warcprox
This commit is contained in:
parent
565ab5f936
commit
9698b0f847
2 changed files with 19 additions and 3 deletions
|
@ -9,8 +9,8 @@ import signal
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import json
|
import json
|
||||||
|
import PIL.Image
|
||||||
__all__ = ["BrozzlerWorker"]
|
import io
|
||||||
|
|
||||||
class BrozzlerWorker:
|
class BrozzlerWorker:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
@ -85,6 +85,15 @@ class BrozzlerWorker:
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def make_thumbnail(self, large_png):
|
||||||
|
img = PIL.Image.open(io.BytesIO(large_png))
|
||||||
|
thumb_width = 300
|
||||||
|
thumb_height = (thumb_width / img.size[0]) * img.size[1]
|
||||||
|
img.thumbnail((thumb_width, thumb_height))
|
||||||
|
out = io.BytesIO()
|
||||||
|
img.save(out, "jpeg", quality=95)
|
||||||
|
return out.getbuffer()
|
||||||
|
|
||||||
def brozzle_page(self, browser, ydl, site, page):
|
def brozzle_page(self, browser, ydl, site, page):
|
||||||
def on_screenshot(screenshot_png):
|
def on_screenshot(screenshot_png):
|
||||||
if site.proxy and site.enable_warcprox_features:
|
if site.proxy and site.enable_warcprox_features:
|
||||||
|
@ -94,7 +103,12 @@ class BrozzlerWorker:
|
||||||
warc_type="resource", content_type="image/png",
|
warc_type="resource", content_type="image/png",
|
||||||
payload=screenshot_png,
|
payload=screenshot_png,
|
||||||
extra_headers=site.extra_headers)
|
extra_headers=site.extra_headers)
|
||||||
# XXX thumbnail
|
thumbnail_jpg = self.make_thumbnail(screenshot_png)
|
||||||
|
self._warcprox_write_record(warcprox_address=site.proxy,
|
||||||
|
url="thumbnail:{}".format(page.url),
|
||||||
|
warc_type="resource", content_type="image/jpeg",
|
||||||
|
payload=thumbnail_jpg,
|
||||||
|
extra_headers=site.extra_headers)
|
||||||
|
|
||||||
self.logger.info("brozzling {}".format(page))
|
self.logger.info("brozzling {}".format(page))
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
PyYAML
|
PyYAML
|
||||||
git+https://github.com/nlevitt/surt.git@py3
|
git+https://github.com/nlevitt/surt.git@py3
|
||||||
|
# -e /home/nlevitt/workspace/surt
|
||||||
git+https://github.com/nlevitt/youtube-dl.git@brozzler
|
git+https://github.com/nlevitt/youtube-dl.git@brozzler
|
||||||
git+https://github.com/seomoz/reppy.git # https://github.com/seomoz/reppy/commit/7661606c not in pypi package
|
git+https://github.com/seomoz/reppy.git # https://github.com/seomoz/reppy/commit/7661606c not in pypi package
|
||||||
requests
|
requests
|
||||||
git+https://github.com/nlevitt/websocket-client.git@tweaks
|
git+https://github.com/nlevitt/websocket-client.git@tweaks
|
||||||
rethinkdb
|
rethinkdb
|
||||||
|
pillow
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue