mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
brozzle-page --screenshot-full-page option
This commit is contained in:
parent
e5a3ada349
commit
65c7ccdcff
@ -153,6 +153,9 @@ def brozzle_page(argv=None):
|
||||
help='use this password to try to log in if a login form is found')
|
||||
arg_parser.add_argument(
|
||||
'--proxy', dest='proxy', default=None, help='http proxy')
|
||||
arg_parser.add_argument(
|
||||
'--screenshot-full-page', dest='screenshot_full_page',
|
||||
action='store_true')
|
||||
arg_parser.add_argument(
|
||||
'--skip-extract-outlinks', dest='skip_extract_outlinks',
|
||||
action='store_true')
|
||||
@ -174,19 +177,20 @@ def brozzle_page(argv=None):
|
||||
'id': -1, 'seed': args.url, 'behavior_parameters': behavior_parameters,
|
||||
'username': args.username, 'password': args.password})
|
||||
page = brozzler.Page(None, {'url': args.url, 'site_id': site.id})
|
||||
worker = brozzler.BrozzlerWorker(frontier=None, proxy=args.proxy,
|
||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||
skip_youtube_dl=args.skip_youtube_dl)
|
||||
worker = brozzler.BrozzlerWorker(
|
||||
frontier=None, proxy=args.proxy,
|
||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||
skip_youtube_dl=args.skip_youtube_dl,
|
||||
screenshot_full_page=args.screenshot_full_page)
|
||||
|
||||
def on_screenshot(screenshot_png):
|
||||
OK_CHARS = (string.ascii_letters + string.digits)
|
||||
filename = '/tmp/{}-{:%Y%m%d%H%M%S}.png'.format(
|
||||
def on_screenshot(screenshot_jpeg):
|
||||
OK_CHARS = string.ascii_letters + string.digits
|
||||
filename = '/tmp/{}-{:%Y%m%d%H%M%S}.jpg'.format(
|
||||
''.join(ch if ch in OK_CHARS else '_' for ch in args.url),
|
||||
datetime.datetime.now())
|
||||
# logging.info('len(screenshot_png)=%s', len(screenshot_png))
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(screenshot_png)
|
||||
f.write(screenshot_jpeg)
|
||||
logging.info('wrote screenshot to %s', filename)
|
||||
|
||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||
|
@ -50,7 +50,8 @@ class BrozzlerWorker:
|
||||
self, frontier, service_registry=None, max_browsers=1,
|
||||
chrome_exe="chromium-browser", warcprox_auto=False, proxy=None,
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||
skip_youtube_dl=False, page_timeout=300, behavior_timeout=900):
|
||||
skip_youtube_dl=False, screenshot_full_page=False,
|
||||
page_timeout=300, behavior_timeout=900):
|
||||
self._frontier = frontier
|
||||
self._service_registry = service_registry
|
||||
self._max_browsers = max_browsers
|
||||
@ -62,6 +63,7 @@ class BrozzlerWorker:
|
||||
self._skip_extract_outlinks = skip_extract_outlinks
|
||||
self._skip_visit_hashtags = skip_visit_hashtags
|
||||
self._skip_youtube_dl = skip_youtube_dl
|
||||
self._screenshot_full_page = screenshot_full_page
|
||||
self._page_timeout = page_timeout
|
||||
self._behavior_timeout = behavior_timeout
|
||||
|
||||
@ -295,6 +297,7 @@ class BrozzlerWorker:
|
||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||
skip_visit_hashtags=self._skip_visit_hashtags,
|
||||
skip_youtube_dl=self._skip_youtube_dl,
|
||||
screenshot_full_page=self._screenshot_full_page,
|
||||
page_timeout=self._page_timeout,
|
||||
behavior_timeout=self._behavior_timeout)
|
||||
if final_page_url != page.url:
|
||||
|
Loading…
x
Reference in New Issue
Block a user