mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
configurable window height & width
This commit is contained in:
parent
7783f92ce2
commit
6d69105c79
@ -1,7 +1,7 @@
|
|||||||
'''
|
'''
|
||||||
brozzler/browser.py - manages the browsers for brozzler
|
brozzler/browser.py - manages the browsers for brozzler
|
||||||
|
|
||||||
Copyright (C) 2014-2020 Internet Archive
|
Copyright (C) 2014-2023 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
'''
|
'''
|
||||||
brozzler/chrome.py - manages the chrome/chromium browser for brozzler
|
brozzler/chrome.py - manages the chrome/chromium browser for brozzler
|
||||||
|
|
||||||
Copyright (C) 2014-2020 Internet Archive
|
Copyright (C) 2014-2023 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -135,7 +135,8 @@ class Chrome:
|
|||||||
return cookie_db
|
return cookie_db
|
||||||
|
|
||||||
def start(self, proxy=None, cookie_db=None, disk_cache_dir=None,
|
def start(self, proxy=None, cookie_db=None, disk_cache_dir=None,
|
||||||
disk_cache_size=None, websocket_timeout=60):
|
disk_cache_size=None, websocket_timeout=60,
|
||||||
|
window_height=900, window_width=1400):
|
||||||
'''
|
'''
|
||||||
Starts chrome/chromium process.
|
Starts chrome/chromium process.
|
||||||
|
|
||||||
@ -150,6 +151,7 @@ class Chrome:
|
|||||||
disk_cache_size: Forces the maximum disk space to be used by the disk
|
disk_cache_size: Forces the maximum disk space to be used by the disk
|
||||||
cache, in bytes. (default None)
|
cache, in bytes. (default None)
|
||||||
websocket_timeout: websocket timeout, in seconds
|
websocket_timeout: websocket timeout, in seconds
|
||||||
|
window_height, window_width: window height and width, in pixels
|
||||||
Returns:
|
Returns:
|
||||||
websocket url to chrome window with about:blank loaded
|
websocket url to chrome window with about:blank loaded
|
||||||
'''
|
'''
|
||||||
@ -172,7 +174,8 @@ class Chrome:
|
|||||||
'--disable-renderer-backgrounding', '--disable-hang-monitor',
|
'--disable-renderer-backgrounding', '--disable-hang-monitor',
|
||||||
'--disable-background-timer-throttling', '--mute-audio',
|
'--disable-background-timer-throttling', '--mute-audio',
|
||||||
'--disable-web-sockets',
|
'--disable-web-sockets',
|
||||||
'--window-size=1400,900', '--no-default-browser-check',
|
f'--window-size={window_width},{window_height}',
|
||||||
|
'--no-default-browser-check',
|
||||||
'--disable-first-run-ui', '--no-first-run',
|
'--disable-first-run-ui', '--no-first-run',
|
||||||
'--homepage=about:blank', '--disable-direct-npapi-requests',
|
'--homepage=about:blank', '--disable-direct-npapi-requests',
|
||||||
'--disable-web-security', '--disable-notifications',
|
'--disable-web-security', '--disable-notifications',
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
'''
|
'''
|
||||||
brozzler/cli.py - brozzler command line executables
|
brozzler/cli.py - brozzler command line executables
|
||||||
|
|
||||||
Copyright (C) 2014-2019 Internet Archive
|
Copyright (C) 2014-2023 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -156,6 +156,12 @@ def brozzle_page(argv=None):
|
|||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--browser_throughput', type=int, dest='download_throughput', default=-1,
|
'--browser_throughput', type=int, dest='download_throughput', default=-1,
|
||||||
help='Chrome DevTools downloadThroughput for Network.emulateNetworkConditions')
|
help='Chrome DevTools downloadThroughput for Network.emulateNetworkConditions')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--browser_window_height', type=int, dest='window_height', default=900,
|
||||||
|
help='browser window height in pixels')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--browser_window_width', type=int, dest='window_width', default=1400,
|
||||||
|
help='browser window width in pixels')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--stealth', dest='stealth', action='store_true',
|
'--stealth', dest='stealth', action='store_true',
|
||||||
help='Try to avoid web bot detection')
|
help='Try to avoid web bot detection')
|
||||||
@ -193,6 +199,8 @@ def brozzle_page(argv=None):
|
|||||||
simpler404=args.simpler404,
|
simpler404=args.simpler404,
|
||||||
screenshot_full_page=args.screenshot_full_page,
|
screenshot_full_page=args.screenshot_full_page,
|
||||||
download_throughput=args.download_throughput,
|
download_throughput=args.download_throughput,
|
||||||
|
window_height=args.window_height,
|
||||||
|
window_width=args.window_width,
|
||||||
stealth=args.stealth)
|
stealth=args.stealth)
|
||||||
|
|
||||||
def on_screenshot(screenshot_jpeg):
|
def on_screenshot(screenshot_jpeg):
|
||||||
@ -206,7 +214,7 @@ def brozzle_page(argv=None):
|
|||||||
|
|
||||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||||
try:
|
try:
|
||||||
browser.start(proxy=args.proxy)
|
browser.start(proxy=args.proxy, window_height=args.window_height, window_width=args.window_width)
|
||||||
outlinks = worker.brozzle_page(
|
outlinks = worker.brozzle_page(
|
||||||
browser, site, page, on_screenshot=on_screenshot,
|
browser, site, page, on_screenshot=on_screenshot,
|
||||||
enable_youtube_dl=not args.skip_youtube_dl)
|
enable_youtube_dl=not args.skip_youtube_dl)
|
||||||
@ -326,6 +334,12 @@ def brozzler_worker(argv=None):
|
|||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--browser_throughput', type=int, dest='download_throughput', default=-1,
|
'--browser_throughput', type=int, dest='download_throughput', default=-1,
|
||||||
help='Chrome DevTools downloadThroughput for Network.emulateNetworkConditions')
|
help='Chrome DevTools downloadThroughput for Network.emulateNetworkConditions')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--browser_window_height', type=int, dest='window_height', default=900,
|
||||||
|
help='browser window height in pixels')
|
||||||
|
arg_parser.add_argument(
|
||||||
|
'--browser_window_width', type=int, dest='window_width', default=1400,
|
||||||
|
help='browser window width in pixels')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--warcprox-auto', dest='warcprox_auto', action='store_true',
|
'--warcprox-auto', dest='warcprox_auto', action='store_true',
|
||||||
help=(
|
help=(
|
||||||
|
@ -3,7 +3,7 @@ brozzler/worker.py - BrozzlerWorker brozzles pages from the frontier, meaning
|
|||||||
it runs youtube-dl on them, browses them and runs behaviors if appropriate,
|
it runs youtube-dl on them, browses them and runs behaviors if appropriate,
|
||||||
scopes and adds outlinks to the frontier
|
scopes and adds outlinks to the frontier
|
||||||
|
|
||||||
Copyright (C) 2014-2018 Internet Archive
|
Copyright (C) 2014-2023 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -53,7 +53,8 @@ class BrozzlerWorker:
|
|||||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||||
skip_youtube_dl=False, simpler404=False, screenshot_full_page=False,
|
skip_youtube_dl=False, simpler404=False, screenshot_full_page=False,
|
||||||
page_timeout=300, behavior_timeout=900, extract_outlinks_timeout=60,
|
page_timeout=300, behavior_timeout=900, extract_outlinks_timeout=60,
|
||||||
download_throughput=-1, stealth=False):
|
download_throughput=-1, stealth=False,
|
||||||
|
window_height=900, window_width=1400):
|
||||||
self._frontier = frontier
|
self._frontier = frontier
|
||||||
self._service_registry = service_registry
|
self._service_registry = service_registry
|
||||||
self._max_browsers = max_browsers
|
self._max_browsers = max_browsers
|
||||||
@ -71,6 +72,8 @@ class BrozzlerWorker:
|
|||||||
self._behavior_timeout = behavior_timeout
|
self._behavior_timeout = behavior_timeout
|
||||||
self._extract_outlinks_timeout = extract_outlinks_timeout
|
self._extract_outlinks_timeout = extract_outlinks_timeout
|
||||||
self._download_throughput = download_throughput
|
self._download_throughput = download_throughput
|
||||||
|
self._window_height = window_height
|
||||||
|
self._window_width = window_width
|
||||||
self._stealth = stealth
|
self._stealth = stealth
|
||||||
|
|
||||||
self._browser_pool = brozzler.browser.BrowserPool(
|
self._browser_pool = brozzler.browser.BrowserPool(
|
||||||
@ -294,7 +297,9 @@ class BrozzlerWorker:
|
|||||||
if not browser.is_running():
|
if not browser.is_running():
|
||||||
browser.start(
|
browser.start(
|
||||||
proxy=self._proxy_for(site),
|
proxy=self._proxy_for(site),
|
||||||
cookie_db=site.get('cookie_db'))
|
cookie_db=site.get('cookie_db'),
|
||||||
|
window_height=self._window_height,
|
||||||
|
window_width=self._window_width)
|
||||||
final_page_url, outlinks = browser.browse_page(
|
final_page_url, outlinks = browser.browse_page(
|
||||||
page.url, extra_headers=site.extra_headers(page),
|
page.url, extra_headers=site.extra_headers(page),
|
||||||
behavior_parameters=site.get('behavior_parameters'),
|
behavior_parameters=site.get('behavior_parameters'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user