mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Configurable JS templates location
Brozzler has hard-coded the JS templates logic in ``brozzler/behaviors.yaml`` and ``brozzler/js-templates/`` locations. With this change, you can use the optional ``behaviors_dir`` ``browser.browse_page`` parameter to set a custom location and use any potential JS behaviors.
This commit is contained in:
parent
503771d653
commit
dacfba330c
@ -68,29 +68,34 @@ logging._levelToName[TRACE] = 'TRACE'
|
||||
logging._nameToLevel['TRACE'] = TRACE
|
||||
|
||||
_behaviors = None
|
||||
def behaviors():
|
||||
def behaviors(behaviors_dir=None):
|
||||
"""Return list of JS behaviors loaded from YAML file.
|
||||
|
||||
:param behaviors_dir: Directory containing `behaviors.yaml` and
|
||||
`js-templates/`. Defaults to brozzler dir.
|
||||
"""
|
||||
import os, yaml, string
|
||||
global _behaviors
|
||||
if _behaviors is None:
|
||||
behaviors_yaml = os.path.join(
|
||||
os.path.dirname(__file__), 'behaviors.yaml')
|
||||
cwd = behaviors_dir or os.path.dirname(__file__)
|
||||
behaviors_yaml = os.path.join(cwd, 'behaviors.yaml')
|
||||
with open(behaviors_yaml) as fin:
|
||||
_behaviors = yaml.load(fin)
|
||||
return _behaviors
|
||||
|
||||
def behavior_script(url, template_parameters=None):
|
||||
def behavior_script(url, template_parameters=None, behaviors_dir=None):
|
||||
'''
|
||||
Returns the javascript behavior string populated with template_parameters.
|
||||
'''
|
||||
import re, logging
|
||||
for behavior in behaviors():
|
||||
for behavior in behaviors(behaviors_dir=behaviors_dir):
|
||||
if re.match(behavior['url_regex'], url):
|
||||
parameters = dict()
|
||||
if 'default_parameters' in behavior:
|
||||
parameters.update(behavior['default_parameters'])
|
||||
if template_parameters:
|
||||
parameters.update(template_parameters)
|
||||
template = jinja2_environment().get_template(
|
||||
template = jinja2_environment(behaviors_dir).get_template(
|
||||
behavior['behavior_js_template'])
|
||||
script = template.render(parameters)
|
||||
logging.info(
|
||||
@ -229,12 +234,16 @@ def sleep(duration):
|
||||
time.sleep(min(duration - elapsed, 0.5))
|
||||
|
||||
_jinja2_env = None
|
||||
def jinja2_environment():
|
||||
def jinja2_environment(behaviors_dir=None):
|
||||
global _jinja2_env
|
||||
if not _jinja2_env:
|
||||
import jinja2, json
|
||||
_jinja2_env = jinja2.Environment(
|
||||
loader=jinja2.PackageLoader('brozzler', 'js-templates'))
|
||||
import os, jinja2, json
|
||||
if behaviors_dir:
|
||||
_loader = jinja2.FileSystemLoader(os.path.join(behaviors_dir,
|
||||
'js-templates'))
|
||||
else:
|
||||
_loader=jinja2.PackageLoader('brozzler', 'js-templates')
|
||||
_jinja2_env = jinja2.Environment(loader=_loader)
|
||||
_jinja2_env.filters['json'] = json.dumps
|
||||
return _jinja2_env
|
||||
|
||||
|
@ -377,7 +377,7 @@ class Browser:
|
||||
|
||||
def browse_page(
|
||||
self, page_url, extra_headers=None,
|
||||
user_agent=None, behavior_parameters=None,
|
||||
user_agent=None, behavior_parameters=None, behaviors_dir=None,
|
||||
on_request=None, on_response=None, on_screenshot=None,
|
||||
username=None, password=None, hashtags=None,
|
||||
skip_extract_outlinks=False, skip_visit_hashtags=False,
|
||||
@ -397,6 +397,8 @@ class Browser:
|
||||
supplied (default None)
|
||||
behavior_parameters: dict of parameters for populating the
|
||||
javascript behavior template (default None)
|
||||
behaviors_dir: Directory containing behaviors.yaml and JS templates
|
||||
(default None loads Brozzler default JS behaviors)
|
||||
on_request: callback to invoke on every Network.requestWillBeSent
|
||||
event, takes one argument, the json-decoded message (default
|
||||
None)
|
||||
@ -447,7 +449,8 @@ class Browser:
|
||||
jpeg_bytes = self.screenshot()
|
||||
on_screenshot(jpeg_bytes)
|
||||
behavior_script = brozzler.behavior_script(
|
||||
page_url, behavior_parameters)
|
||||
page_url, behavior_parameters,
|
||||
behaviors_dir=behaviors_dir)
|
||||
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||
if skip_extract_outlinks:
|
||||
outlinks = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user