initial login additions

This commit is contained in:
Barbara Miller 2016-10-28 18:20:25 -07:00
parent fef7d6a9fa
commit 6c7f88c171
3 changed files with 14 additions and 1 deletions

7
brozzler/cli.py Normal file → Executable file
View File

@ -120,6 +120,8 @@ def brozzle_page():
'-e', '--chrome-exe', dest='chrome_exe',
default=suggest_default_chrome_exe(),
help='executable to use to invoke chrome')
arg_parser.add_argument('--behavior-parameters', dest='behavior_parameters',
default=None, help='json blob of parameters to populate the javascript behavior template, e.g. {"parameter_username":"x","parameter_password":"y"}')
arg_parser.add_argument(
'--proxy', dest='proxy', default=None,
help='http proxy')
@ -133,9 +135,12 @@ def brozzle_page():
args = arg_parser.parse_args(args=sys.argv[1:])
_configure_logging(args)
metadata = {}
if args.behavior_parameters:
metadata["behavior_parameters"] = json.loads(args.behavior_parameters)
site = brozzler.Site(
id=-1, seed=args.url, proxy=args.proxy,
enable_warcprox_features=args.enable_warcprox_features)
metadata=metadata, enable_warcprox_features=args.enable_warcprox_features)
page = brozzler.Page(url=args.url, site_id=site.id)
worker = brozzler.BrozzlerWorker(frontier=None)

View File

@ -75,6 +75,8 @@ def new_job(frontier, job_conf):
sites = []
for seed_conf in job_conf["seeds"]:
merged_conf = merge(seed_conf, job_conf)
if "login" in merged_conf and "metadata" in merged_conf:
merged_conf["metadata"]["login"] = merged_conf["login"]
site = brozzler.Site(
job_id=job.id, seed=merged_conf["url"],
scope=merged_conf.get("scope"),

View File

@ -269,10 +269,16 @@ class BrozzlerWorker:
if self._needs_browsing(page, ydl_spy):
self.logger.info('needs browsing: %s', page)
behavior_parameters = {}
if "login" in site.metadata:
behavior_parameters.update(site.metadata["login"])
if "behavior_parameters" in site.metadata:
behavior_parameters.update(site.metadata["behavior_parameters"])
if not browser.is_running():
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
outlinks = browser.browse_page(
page.url, extra_headers=site.extra_headers(),
behavior_parameters=behavior_parameters,
user_agent=site.user_agent,
on_screenshot=_on_screenshot,
on_url_change=page.note_redirect)