Add option to capture full page screenshot

Add option `full_page` to `Browser.screenshot`. The default behavior
remains the same.
We get inspiration from puppeteer to capture a screenshot of the full
page:
https://github.com/GoogleChrome/puppeteer/blob/master/lib/Page.js#L898

Add option `screenshot_full_page=False` to `Browser.browse_page` to use
the new feature when capturing a page.
This commit is contained in:
Vangelis Banos 2019-10-08 10:55:10 +00:00
parent 464562461c
commit 34d8f87fb5

View file

@ -410,8 +410,9 @@ class Browser:
on_request=None, on_response=None, on_request=None, on_response=None,
on_service_worker_version_updated=None, on_screenshot=None, on_service_worker_version_updated=None, on_screenshot=None,
username=None, password=None, hashtags=None, username=None, password=None, hashtags=None,
skip_extract_outlinks=False, skip_visit_hashtags=False, screenshot_full_page=False, skip_extract_outlinks=False,
skip_youtube_dl=False, page_timeout=300, behavior_timeout=900): skip_visit_hashtags=False, skip_youtube_dl=False, page_timeout=300,
behavior_timeout=900):
''' '''
Browses page in browser. Browses page in browser.
@ -487,7 +488,7 @@ class Browser:
page_url) page_url)
self.navigate_to_page(page_url, timeout=page_timeout) self.navigate_to_page(page_url, timeout=page_timeout)
if on_screenshot: if on_screenshot:
self._try_screenshot(on_screenshot) self._try_screenshot(on_screenshot, screenshot_full_page)
behavior_script = brozzler.behavior_script( behavior_script = brozzler.behavior_script(
page_url, behavior_parameters, page_url, behavior_parameters,
behaviors_dir=behaviors_dir) behaviors_dir=behaviors_dir)
@ -512,10 +513,10 @@ class Browser:
self.websock_thread.on_request = None self.websock_thread.on_request = None
self.websock_thread.on_response = None self.websock_thread.on_response = None
def _try_screenshot(self, on_screenshot): def _try_screenshot(self, on_screenshot, full_page=False):
for i in range(3): for i in range(3):
try: try:
jpeg_bytes = self.screenshot() jpeg_bytes = self.screenshot(full_page)
on_screenshot(jpeg_bytes) on_screenshot(jpeg_bytes)
return return
except BrowsingTimeout as e: except BrowsingTimeout as e:
@ -591,10 +592,36 @@ class Browser:
'problem extracting outlinks, result message: %s', message) 'problem extracting outlinks, result message: %s', message)
return frozenset() return frozenset()
def screenshot(self, timeout=45): def screenshot(self, full_page=False, timeout=45):
"""Optionally capture full page screenshot using puppeteer as an
inspiration:
https://github.com/GoogleChrome/puppeteer/blob/master/lib/Page.js#L898
"""
self.logger.info('taking screenshot') self.logger.info('taking screenshot')
if full_page:
self.websock_thread.expect_result(self._command_id.peek()) self.websock_thread.expect_result(self._command_id.peek())
msg_id = self.send_to_chrome(method='Page.captureScreenshot') msg_id = self.send_to_chrome(method='Page.getLayoutMetrics')
self._wait_for(
lambda: self.websock_thread.received_result(msg_id),
timeout=timeout)
message = self.websock_thread.pop_result(msg_id)
width = message['result']['contentSize']['width']
height = message['result']['contentSize']['height']
clip = dict(x=0, y=0, width=width, height=height, scale=1)
deviceScaleFactor = 1
screenOrientation = {'angle': 0, 'type': 'portraitPrimary'}
self.send_to_chrome(
method='Emulation.setDeviceMetricsOverride',
params=dict(mobile=False, width=width, height=height,
deviceScaleFactor=deviceScaleFactor,
screenOrientation=screenOrientation)
)
capture_params = {'format': 'jpeg', 'clip': clip}
else:
capture_params = {'format': 'jpeg'}
self.websock_thread.expect_result(self._command_id.peek())
msg_id = self.send_to_chrome(method='Page.captureScreenshot',
params=capture_params)
self._wait_for( self._wait_for(
lambda: self.websock_thread.received_result(msg_id), lambda: self.websock_thread.received_result(msg_id),
timeout=timeout) timeout=timeout)