mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Fix JPEG thumbnail problems
Due to the fact that we run JS behaviors before we capture the screenshot, the browser could be scrolled down in the page. When we don't capture the full page, we may get a screenshot of the bottom part of the page and not the top. To fix that we run `window.scroll(0, 0)` before capturing the screenshot. We change method `BrozzlerWorker.full_and_thumb_jpegs` to `BrozzlerWorker.thumb_jpeg`. That's because we already get a JPEG now from the browser after our changes at `Browser.screenshot`. `thumb_jpeg` only returns a thumbnail now. There is no need to read PNG and convert to JPEG. This means that screenshots will be a bit faster now :)
This commit is contained in:
parent
674da4aa99
commit
ba901e3a99
@ -514,6 +514,11 @@ class Browser:
|
||||
self.websock_thread.on_response = None
|
||||
|
||||
def _try_screenshot(self, on_screenshot, full_page=False):
|
||||
"""The browser instance must be scrolled to the top of the page before
|
||||
trying to get a screenshot.
|
||||
"""
|
||||
self.send_to_chrome(method='Runtime.evaluate', suppress_logging=True,
|
||||
params={'expression': 'window.scroll(0,0)'})
|
||||
for i in range(3):
|
||||
try:
|
||||
jpeg_bytes = self.screenshot(full_page)
|
||||
@ -616,9 +621,9 @@ class Browser:
|
||||
deviceScaleFactor=deviceScaleFactor,
|
||||
screenOrientation=screenOrientation)
|
||||
)
|
||||
capture_params = {'format': 'jpeg', quality: 95, 'clip': clip}
|
||||
capture_params = {'format': 'jpeg', 'quality': 95, 'clip': clip}
|
||||
else:
|
||||
capture_params = {'format': 'jpeg', quality: 95}
|
||||
capture_params = {'format': 'jpeg', 'quality': 95}
|
||||
self.websock_thread.expect_result(self._command_id.peek())
|
||||
msg_id = self.send_to_chrome(method='Page.captureScreenshot',
|
||||
params=capture_params)
|
||||
|
@ -165,22 +165,16 @@ class BrozzlerWorker:
|
||||
raise brozzler.ProxyError(
|
||||
'proxy error on WARCPROX_WRITE_RECORD %s' % url) from e
|
||||
|
||||
def full_and_thumb_jpegs(self, large_png):
|
||||
# these screenshots never have any alpha (right?)
|
||||
img = PIL.Image.open(io.BytesIO(large_png)).convert('RGB')
|
||||
|
||||
out = io.BytesIO()
|
||||
img.save(out, "jpeg", quality=95)
|
||||
full_jpeg = out.getbuffer()
|
||||
|
||||
def thumb_jpeg(self, full_jpeg):
|
||||
"""Create JPEG thumbnail.
|
||||
"""
|
||||
img = PIL.Image.open(io.BytesIO(full_jpeg))
|
||||
thumb_width = 300
|
||||
thumb_height = (thumb_width / img.size[0]) * img.size[1]
|
||||
img.thumbnail((thumb_width, thumb_height))
|
||||
out = io.BytesIO()
|
||||
img.save(out, "jpeg", quality=95)
|
||||
thumb_jpeg = out.getbuffer()
|
||||
|
||||
return full_jpeg, thumb_jpeg
|
||||
return out.getbuffer()
|
||||
|
||||
def brozzle_page(self, browser, site, page, on_screenshot=None,
|
||||
on_request=None, enable_youtube_dl=True):
|
||||
@ -226,15 +220,14 @@ class BrozzlerWorker:
|
||||
return outlinks
|
||||
|
||||
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
||||
def _on_screenshot(screenshot_png):
|
||||
def _on_screenshot(screenshot_jpeg):
|
||||
if on_screenshot:
|
||||
on_screenshot(screenshot_png)
|
||||
on_screenshot(screenshot_jpeg)
|
||||
if self._using_warcprox(site):
|
||||
self.logger.info(
|
||||
"sending WARCPROX_WRITE_RECORD request to %s with "
|
||||
"screenshot for %s", self._proxy_for(site), page)
|
||||
screenshot_jpeg, thumbnail_jpeg = self.full_and_thumb_jpegs(
|
||||
screenshot_png)
|
||||
thumbnail_jpeg = self.thumb_jpeg(screenshot_jpeg)
|
||||
self._warcprox_write_record(
|
||||
warcprox_address=self._proxy_for(site),
|
||||
url="screenshot:%s" % str(urlcanon.semantic(page.url)),
|
||||
|
Loading…
x
Reference in New Issue
Block a user