mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 05:52:27 -04:00
Add --no-headless
option to brozzle-page and brozzler-worker CLI
This commit is contained in:
parent
7d7968e833
commit
08bb09ff06
2 changed files with 18 additions and 0 deletions
|
@ -270,6 +270,12 @@ def brozzle_page(argv=None):
|
||||||
help="use this password to try to log in if a login form is found",
|
help="use this password to try to log in if a login form is found",
|
||||||
)
|
)
|
||||||
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
|
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--no-headless",
|
||||||
|
dest="headless",
|
||||||
|
action="store_false",
|
||||||
|
help="Do not run Chrome headlessly",
|
||||||
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--browser_throughput",
|
"--browser_throughput",
|
||||||
type=int,
|
type=int,
|
||||||
|
@ -358,6 +364,7 @@ def brozzle_page(argv=None):
|
||||||
worker = brozzler.BrozzlerWorker(
|
worker = brozzler.BrozzlerWorker(
|
||||||
frontier=None,
|
frontier=None,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
|
headless=args.headless,
|
||||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||||
skip_youtube_dl=args.skip_youtube_dl,
|
skip_youtube_dl=args.skip_youtube_dl,
|
||||||
|
@ -390,6 +397,7 @@ def brozzle_page(argv=None):
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
window_height=args.window_height,
|
window_height=args.window_height,
|
||||||
window_width=args.window_width,
|
window_width=args.window_width,
|
||||||
|
headless=args.headless,
|
||||||
)
|
)
|
||||||
outlinks = worker.brozzle_page(
|
outlinks = worker.brozzle_page(
|
||||||
browser,
|
browser,
|
||||||
|
@ -568,6 +576,12 @@ def brozzler_worker(argv=None):
|
||||||
help="max number of chrome instances simultaneously browsing pages",
|
help="max number of chrome instances simultaneously browsing pages",
|
||||||
)
|
)
|
||||||
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
|
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--no-headless",
|
||||||
|
dest="headless",
|
||||||
|
action="store_false",
|
||||||
|
help="Do not run Chrome headlessly",
|
||||||
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--browser_throughput",
|
"--browser_throughput",
|
||||||
type=int,
|
type=int,
|
||||||
|
@ -707,6 +721,7 @@ def brozzler_worker(argv=None):
|
||||||
max_browsers=int(args.max_browsers),
|
max_browsers=int(args.max_browsers),
|
||||||
chrome_exe=args.chrome_exe,
|
chrome_exe=args.chrome_exe,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
|
headless=args.headless,
|
||||||
warcprox_auto=args.warcprox_auto,
|
warcprox_auto=args.warcprox_auto,
|
||||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||||
|
|
|
@ -67,6 +67,7 @@ class BrozzlerWorker:
|
||||||
chrome_exe="chromium-browser",
|
chrome_exe="chromium-browser",
|
||||||
warcprox_auto=False,
|
warcprox_auto=False,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
|
headless=True,
|
||||||
skip_extract_outlinks=False,
|
skip_extract_outlinks=False,
|
||||||
skip_visit_hashtags=False,
|
skip_visit_hashtags=False,
|
||||||
skip_youtube_dl=False,
|
skip_youtube_dl=False,
|
||||||
|
@ -94,6 +95,7 @@ class BrozzlerWorker:
|
||||||
self._proxy = proxy
|
self._proxy = proxy
|
||||||
assert not (warcprox_auto and proxy)
|
assert not (warcprox_auto and proxy)
|
||||||
self._proxy_is_warcprox = None
|
self._proxy_is_warcprox = None
|
||||||
|
self._headless = headless
|
||||||
self._skip_extract_outlinks = skip_extract_outlinks
|
self._skip_extract_outlinks = skip_extract_outlinks
|
||||||
self._skip_visit_hashtags = skip_visit_hashtags
|
self._skip_visit_hashtags = skip_visit_hashtags
|
||||||
self._skip_youtube_dl = skip_youtube_dl
|
self._skip_youtube_dl = skip_youtube_dl
|
||||||
|
@ -509,6 +511,7 @@ class BrozzlerWorker:
|
||||||
cookie_db=site.get("cookie_db"),
|
cookie_db=site.get("cookie_db"),
|
||||||
window_height=self._window_height,
|
window_height=self._window_height,
|
||||||
window_width=self._window_width,
|
window_width=self._window_width,
|
||||||
|
headless=self._headless,
|
||||||
)
|
)
|
||||||
final_page_url, outlinks = browser.browse_page(
|
final_page_url, outlinks = browser.browse_page(
|
||||||
page.url,
|
page.url,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue