From 6c7f88c1719fc72210a1fbf9e119fe47160b4a50 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Fri, 28 Oct 2016 18:20:25 -0700 Subject: [PATCH] initial login additions --- brozzler/cli.py | 7 ++++++- brozzler/job.py | 2 ++ brozzler/worker.py | 6 ++++++ 3 files changed, 14 insertions(+), 1 deletion(-) mode change 100644 => 100755 brozzler/cli.py diff --git a/brozzler/cli.py b/brozzler/cli.py old mode 100644 new mode 100755 index cd23969..cf0373b --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -120,6 +120,8 @@ def brozzle_page(): '-e', '--chrome-exe', dest='chrome_exe', default=suggest_default_chrome_exe(), help='executable to use to invoke chrome') + arg_parser.add_argument('--behavior-parameters', dest='behavior_parameters', + default=None, help='json blob of parameters to populate the javascript behavior template, e.g. {"parameter_username":"x","parameter_password":"y"}') arg_parser.add_argument( '--proxy', dest='proxy', default=None, help='http proxy') @@ -133,9 +135,12 @@ def brozzle_page(): args = arg_parser.parse_args(args=sys.argv[1:]) _configure_logging(args) + metadata = {} + if args.behavior_parameters: + metadata["behavior_parameters"] = json.loads(args.behavior_parameters) site = brozzler.Site( id=-1, seed=args.url, proxy=args.proxy, - enable_warcprox_features=args.enable_warcprox_features) + metadata=metadata, enable_warcprox_features=args.enable_warcprox_features) page = brozzler.Page(url=args.url, site_id=site.id) worker = brozzler.BrozzlerWorker(frontier=None) diff --git a/brozzler/job.py b/brozzler/job.py index d533616..f6b5f92 100644 --- a/brozzler/job.py +++ b/brozzler/job.py @@ -75,6 +75,8 @@ def new_job(frontier, job_conf): sites = [] for seed_conf in job_conf["seeds"]: merged_conf = merge(seed_conf, job_conf) + if "login" in merged_conf and "metadata" in merged_conf: + merged_conf["metadata"]["login"] = merged_conf["login"] site = brozzler.Site( job_id=job.id, seed=merged_conf["url"], scope=merged_conf.get("scope"), diff --git a/brozzler/worker.py b/brozzler/worker.py index 2b3e745..401e4f1 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -269,10 +269,16 @@ class BrozzlerWorker: if self._needs_browsing(page, ydl_spy): self.logger.info('needs browsing: %s', page) + behavior_parameters = {} + if "login" in site.metadata: + behavior_parameters.update(site.metadata["login"]) + if "behavior_parameters" in site.metadata: + behavior_parameters.update(site.metadata["behavior_parameters"]) if not browser.is_running(): browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db) outlinks = browser.browse_page( page.url, extra_headers=site.extra_headers(), + behavior_parameters=behavior_parameters, user_agent=site.user_agent, on_screenshot=_on_screenshot, on_url_change=page.note_redirect)