Add --no-headless option to brozzle-page and brozzler-worker CLI
Some checks failed
Python Formatting Check / formatting (push) Has been cancelled
Tests / Run tests (push) Has been cancelled

This commit is contained in:
TheTechRobo 2025-07-28 17:48:30 -04:00 committed by Misty De Méo
parent 7d7968e833
commit 08bb09ff06
2 changed files with 18 additions and 0 deletions

View file

@ -270,6 +270,12 @@ def brozzle_page(argv=None):
help="use this password to try to log in if a login form is found",
)
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
arg_parser.add_argument(
"--no-headless",
dest="headless",
action="store_false",
help="Do not run Chrome headlessly",
)
arg_parser.add_argument(
"--browser_throughput",
type=int,
@ -358,6 +364,7 @@ def brozzle_page(argv=None):
worker = brozzler.BrozzlerWorker(
frontier=None,
proxy=args.proxy,
headless=args.headless,
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,
skip_youtube_dl=args.skip_youtube_dl,
@ -390,6 +397,7 @@ def brozzle_page(argv=None):
proxy=args.proxy,
window_height=args.window_height,
window_width=args.window_width,
headless=args.headless,
)
outlinks = worker.brozzle_page(
browser,
@ -568,6 +576,12 @@ def brozzler_worker(argv=None):
help="max number of chrome instances simultaneously browsing pages",
)
arg_parser.add_argument("--proxy", dest="proxy", default=None, help="http proxy")
arg_parser.add_argument(
"--no-headless",
dest="headless",
action="store_false",
help="Do not run Chrome headlessly",
)
arg_parser.add_argument(
"--browser_throughput",
type=int,
@ -707,6 +721,7 @@ def brozzler_worker(argv=None):
max_browsers=int(args.max_browsers),
chrome_exe=args.chrome_exe,
proxy=args.proxy,
headless=args.headless,
warcprox_auto=args.warcprox_auto,
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,

View file

@ -67,6 +67,7 @@ class BrozzlerWorker:
chrome_exe="chromium-browser",
warcprox_auto=False,
proxy=None,
headless=True,
skip_extract_outlinks=False,
skip_visit_hashtags=False,
skip_youtube_dl=False,
@ -94,6 +95,7 @@ class BrozzlerWorker:
self._proxy = proxy
assert not (warcprox_auto and proxy)
self._proxy_is_warcprox = None
self._headless = headless
self._skip_extract_outlinks = skip_extract_outlinks
self._skip_visit_hashtags = skip_visit_hashtags
self._skip_youtube_dl = skip_youtube_dl
@ -509,6 +511,7 @@ class BrozzlerWorker:
cookie_db=site.get("cookie_db"),
window_height=self._window_height,
window_width=self._window_width,
headless=self._headless,
)
final_page_url, outlinks = browser.browse_page(
page.url,