mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
move behavior_parameters into top level of site configuration
This commit is contained in:
parent
185d65bd5b
commit
9d66f294ec
@ -120,8 +120,12 @@ def brozzle_page():
|
||||
'-e', '--chrome-exe', dest='chrome_exe',
|
||||
default=suggest_default_chrome_exe(),
|
||||
help='executable to use to invoke chrome')
|
||||
arg_parser.add_argument('--behavior-parameters', dest='behavior_parameters',
|
||||
default=None, help='json blob of parameters to populate the javascript behavior template, e.g. {"parameter_username":"x","parameter_password":"y"}')
|
||||
arg_parser.add_argument(
|
||||
'--behavior-parameters', dest='behavior_parameters',
|
||||
default=None, help=(
|
||||
'json blob of parameters to populate the javascript behavior '
|
||||
'template, e.g. {"parameter_username":"x",'
|
||||
'"parameter_password":"y"}'))
|
||||
arg_parser.add_argument(
|
||||
'--proxy', dest='proxy', default=None,
|
||||
help='http proxy')
|
||||
@ -135,12 +139,13 @@ def brozzle_page():
|
||||
args = arg_parser.parse_args(args=sys.argv[1:])
|
||||
_configure_logging(args)
|
||||
|
||||
metadata = {}
|
||||
behavior_parameters = {}
|
||||
if args.behavior_parameters:
|
||||
metadata["behavior_parameters"] = json.loads(args.behavior_parameters)
|
||||
behavior_parameters = json.loads(args.behavior_parameters)
|
||||
site = brozzler.Site(
|
||||
id=-1, seed=args.url, proxy=args.proxy,
|
||||
metadata=metadata, enable_warcprox_features=args.enable_warcprox_features)
|
||||
enable_warcprox_features=args.enable_warcprox_features,
|
||||
behavior_parameters=behavior_parameters)
|
||||
page = brozzler.Page(url=args.url, site_id=site.id)
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
|
||||
|
@ -69,6 +69,9 @@ id:
|
||||
user_agent:
|
||||
type: string
|
||||
|
||||
behavior_parameters:
|
||||
type: dict
|
||||
|
||||
seeds:
|
||||
type: list
|
||||
required: true
|
||||
|
@ -96,7 +96,7 @@ class Site(brozzler.BaseDictable):
|
||||
status="ACTIVE", claimed=False, start_time=None,
|
||||
last_disclaimed=_EPOCH_UTC, last_claimed_by=None,
|
||||
last_claimed=_EPOCH_UTC, metadata={}, remember_outlinks=None,
|
||||
cookie_db=None, user_agent=None):
|
||||
cookie_db=None, user_agent=None, behavior_parameters=None):
|
||||
|
||||
self.seed = seed
|
||||
self.id = id
|
||||
@ -117,6 +117,7 @@ class Site(brozzler.BaseDictable):
|
||||
self.remember_outlinks = remember_outlinks
|
||||
self.cookie_db = cookie_db
|
||||
self.user_agent = user_agent
|
||||
self.behavior_parameters = behavior_parameters
|
||||
|
||||
self.scope = scope or {}
|
||||
if not "surt" in self.scope:
|
||||
|
@ -269,17 +269,11 @@ class BrozzlerWorker:
|
||||
|
||||
if self._needs_browsing(page, ydl_spy):
|
||||
self.logger.info('needs browsing: %s', page)
|
||||
behavior_parameters = {}
|
||||
if "login" in site.metadata:
|
||||
behavior_parameters["parameter_username"] = site.metadata["login"]["username"]
|
||||
behavior_parameters["parameter_password"] = site.metadata["login"]["password"]
|
||||
if "behavior_parameters" in site.metadata:
|
||||
behavior_parameters.update(site.metadata["behavior_parameters"])
|
||||
if not browser.is_running():
|
||||
browser.start(proxy=self._proxy(site), cookie_db=site.cookie_db)
|
||||
outlinks = browser.browse_page(
|
||||
page.url, extra_headers=site.extra_headers(),
|
||||
behavior_parameters=behavior_parameters,
|
||||
behavior_parameters=site.behavior_parameters,
|
||||
user_agent=site.user_agent,
|
||||
on_screenshot=_on_screenshot,
|
||||
on_url_change=page.note_redirect)
|
||||
|
Loading…
x
Reference in New Issue
Block a user