mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
initial login additions
This commit is contained in:
parent
fef7d6a9fa
commit
6c7f88c171
7
brozzler/cli.py
Normal file → Executable file
7
brozzler/cli.py
Normal file → Executable file
@ -120,6 +120,8 @@ def brozzle_page():
|
||||
'-e', '--chrome-exe', dest='chrome_exe',
|
||||
default=suggest_default_chrome_exe(),
|
||||
help='executable to use to invoke chrome')
|
||||
arg_parser.add_argument('--behavior-parameters', dest='behavior_parameters',
|
||||
default=None, help='json blob of parameters to populate the javascript behavior template, e.g. {"parameter_username":"x","parameter_password":"y"}')
|
||||
arg_parser.add_argument(
|
||||
'--proxy', dest='proxy', default=None,
|
||||
help='http proxy')
|
||||
@ -133,9 +135,12 @@ def brozzle_page():
|
||||
args = arg_parser.parse_args(args=sys.argv[1:])
|
||||
_configure_logging(args)
|
||||
|
||||
metadata = {}
|
||||
if args.behavior_parameters:
|
||||
metadata["behavior_parameters"] = json.loads(args.behavior_parameters)
|
||||
site = brozzler.Site(
|
||||
id=-1, seed=args.url, proxy=args.proxy,
|
||||
enable_warcprox_features=args.enable_warcprox_features)
|
||||
metadata=metadata, enable_warcprox_features=args.enable_warcprox_features)
|
||||
page = brozzler.Page(url=args.url, site_id=site.id)
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
|
||||
|
@ -75,6 +75,8 @@ def new_job(frontier, job_conf):
|
||||
sites = []
|
||||
for seed_conf in job_conf["seeds"]:
|
||||
merged_conf = merge(seed_conf, job_conf)
|
||||
if "login" in merged_conf and "metadata" in merged_conf:
|
||||
merged_conf["metadata"]["login"] = merged_conf["login"]
|
||||
site = brozzler.Site(
|
||||
job_id=job.id, seed=merged_conf["url"],
|
||||
scope=merged_conf.get("scope"),
|
||||
|
@ -269,10 +269,16 @@ class BrozzlerWorker:
|
||||
|
||||
if self._needs_browsing(page, ydl_spy):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
behavior_parameters = {}
|
||||
if "login" in site.metadata:
|
||||
behavior_parameters.update(site.metadata["login"])
|
||||
if "behavior_parameters" in site.metadata:
|
||||
behavior_parameters.update(site.metadata["behavior_parameters"])
|
||||
if not browser.is_running():
|
||||
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
|
||||
outlinks = browser.browse_page(
|
||||
page.url, extra_headers=site.extra_headers(),
|
||||
behavior_parameters=behavior_parameters,
|
||||
user_agent=site.user_agent,
|
||||
on_screenshot=_on_screenshot,
|
||||
on_url_change=page.note_redirect)
|
||||
|
Loading…
x
Reference in New Issue
Block a user