Configurable JS templates location

Brozzler has hard-coded the JS templates logic in  ``brozzler/behaviors.yaml``
and ``brozzler/js-templates/`` locations. With this change, you can use
the optional ``behaviors_dir`` ``browser.browse_page`` parameter to set a
custom location and use any potential JS behaviors.
This commit is contained in:
Vangelis Banos 2018-01-04 17:37:02 +00:00
parent 503771d653
commit dacfba330c
2 changed files with 24 additions and 12 deletions

View File

@ -68,29 +68,34 @@ logging._levelToName[TRACE] = 'TRACE'
logging._nameToLevel['TRACE'] = TRACE
_behaviors = None
def behaviors():
def behaviors(behaviors_dir=None):
"""Return list of JS behaviors loaded from YAML file.
:param behaviors_dir: Directory containing `behaviors.yaml` and
`js-templates/`. Defaults to brozzler dir.
"""
import os, yaml, string
global _behaviors
if _behaviors is None:
behaviors_yaml = os.path.join(
os.path.dirname(__file__), 'behaviors.yaml')
cwd = behaviors_dir or os.path.dirname(__file__)
behaviors_yaml = os.path.join(cwd, 'behaviors.yaml')
with open(behaviors_yaml) as fin:
_behaviors = yaml.load(fin)
return _behaviors
def behavior_script(url, template_parameters=None):
def behavior_script(url, template_parameters=None, behaviors_dir=None):
'''
Returns the javascript behavior string populated with template_parameters.
'''
import re, logging
for behavior in behaviors():
for behavior in behaviors(behaviors_dir=behaviors_dir):
if re.match(behavior['url_regex'], url):
parameters = dict()
if 'default_parameters' in behavior:
parameters.update(behavior['default_parameters'])
if template_parameters:
parameters.update(template_parameters)
template = jinja2_environment().get_template(
template = jinja2_environment(behaviors_dir).get_template(
behavior['behavior_js_template'])
script = template.render(parameters)
logging.info(
@ -229,12 +234,16 @@ def sleep(duration):
time.sleep(min(duration - elapsed, 0.5))
_jinja2_env = None
def jinja2_environment():
def jinja2_environment(behaviors_dir=None):
global _jinja2_env
if not _jinja2_env:
import jinja2, json
_jinja2_env = jinja2.Environment(
loader=jinja2.PackageLoader('brozzler', 'js-templates'))
import os, jinja2, json
if behaviors_dir:
_loader = jinja2.FileSystemLoader(os.path.join(behaviors_dir,
'js-templates'))
else:
_loader=jinja2.PackageLoader('brozzler', 'js-templates')
_jinja2_env = jinja2.Environment(loader=_loader)
_jinja2_env.filters['json'] = json.dumps
return _jinja2_env

View File

@ -377,7 +377,7 @@ class Browser:
def browse_page(
self, page_url, extra_headers=None,
user_agent=None, behavior_parameters=None,
user_agent=None, behavior_parameters=None, behaviors_dir=None,
on_request=None, on_response=None, on_screenshot=None,
username=None, password=None, hashtags=None,
skip_extract_outlinks=False, skip_visit_hashtags=False,
@ -397,6 +397,8 @@ class Browser:
supplied (default None)
behavior_parameters: dict of parameters for populating the
javascript behavior template (default None)
behaviors_dir: Directory containing behaviors.yaml and JS templates
(default None loads Brozzler default JS behaviors)
on_request: callback to invoke on every Network.requestWillBeSent
event, takes one argument, the json-decoded message (default
None)
@ -447,7 +449,8 @@ class Browser:
jpeg_bytes = self.screenshot()
on_screenshot(jpeg_bytes)
behavior_script = brozzler.behavior_script(
page_url, behavior_parameters)
page_url, behavior_parameters,
behaviors_dir=behaviors_dir)
self.run_behavior(behavior_script, timeout=behavior_timeout)
if skip_extract_outlinks:
outlinks = []