Configurable JS templates location

Brozzler has hard-coded the JS templates logic in  ``brozzler/behaviors.yaml``
and ``brozzler/js-templates/`` locations. With this change, you can use
the optional ``behaviors_dir`` ``browser.browse_page`` parameter to set a
custom location and use any potential JS behaviors.
This commit is contained in:
Vangelis Banos 2018-01-04 17:37:02 +00:00
parent 503771d653
commit dacfba330c
2 changed files with 24 additions and 12 deletions

View File

@ -68,29 +68,34 @@ logging._levelToName[TRACE] = 'TRACE'
logging._nameToLevel['TRACE'] = TRACE logging._nameToLevel['TRACE'] = TRACE
_behaviors = None _behaviors = None
def behaviors(): def behaviors(behaviors_dir=None):
"""Return list of JS behaviors loaded from YAML file.
:param behaviors_dir: Directory containing `behaviors.yaml` and
`js-templates/`. Defaults to brozzler dir.
"""
import os, yaml, string import os, yaml, string
global _behaviors global _behaviors
if _behaviors is None: if _behaviors is None:
behaviors_yaml = os.path.join( cwd = behaviors_dir or os.path.dirname(__file__)
os.path.dirname(__file__), 'behaviors.yaml') behaviors_yaml = os.path.join(cwd, 'behaviors.yaml')
with open(behaviors_yaml) as fin: with open(behaviors_yaml) as fin:
_behaviors = yaml.load(fin) _behaviors = yaml.load(fin)
return _behaviors return _behaviors
def behavior_script(url, template_parameters=None): def behavior_script(url, template_parameters=None, behaviors_dir=None):
''' '''
Returns the javascript behavior string populated with template_parameters. Returns the javascript behavior string populated with template_parameters.
''' '''
import re, logging import re, logging
for behavior in behaviors(): for behavior in behaviors(behaviors_dir=behaviors_dir):
if re.match(behavior['url_regex'], url): if re.match(behavior['url_regex'], url):
parameters = dict() parameters = dict()
if 'default_parameters' in behavior: if 'default_parameters' in behavior:
parameters.update(behavior['default_parameters']) parameters.update(behavior['default_parameters'])
if template_parameters: if template_parameters:
parameters.update(template_parameters) parameters.update(template_parameters)
template = jinja2_environment().get_template( template = jinja2_environment(behaviors_dir).get_template(
behavior['behavior_js_template']) behavior['behavior_js_template'])
script = template.render(parameters) script = template.render(parameters)
logging.info( logging.info(
@ -229,12 +234,16 @@ def sleep(duration):
time.sleep(min(duration - elapsed, 0.5)) time.sleep(min(duration - elapsed, 0.5))
_jinja2_env = None _jinja2_env = None
def jinja2_environment(): def jinja2_environment(behaviors_dir=None):
global _jinja2_env global _jinja2_env
if not _jinja2_env: if not _jinja2_env:
import jinja2, json import os, jinja2, json
_jinja2_env = jinja2.Environment( if behaviors_dir:
loader=jinja2.PackageLoader('brozzler', 'js-templates')) _loader = jinja2.FileSystemLoader(os.path.join(behaviors_dir,
'js-templates'))
else:
_loader=jinja2.PackageLoader('brozzler', 'js-templates')
_jinja2_env = jinja2.Environment(loader=_loader)
_jinja2_env.filters['json'] = json.dumps _jinja2_env.filters['json'] = json.dumps
return _jinja2_env return _jinja2_env

View File

@ -377,7 +377,7 @@ class Browser:
def browse_page( def browse_page(
self, page_url, extra_headers=None, self, page_url, extra_headers=None,
user_agent=None, behavior_parameters=None, user_agent=None, behavior_parameters=None, behaviors_dir=None,
on_request=None, on_response=None, on_screenshot=None, on_request=None, on_response=None, on_screenshot=None,
username=None, password=None, hashtags=None, username=None, password=None, hashtags=None,
skip_extract_outlinks=False, skip_visit_hashtags=False, skip_extract_outlinks=False, skip_visit_hashtags=False,
@ -397,6 +397,8 @@ class Browser:
supplied (default None) supplied (default None)
behavior_parameters: dict of parameters for populating the behavior_parameters: dict of parameters for populating the
javascript behavior template (default None) javascript behavior template (default None)
behaviors_dir: Directory containing behaviors.yaml and JS templates
(default None loads Brozzler default JS behaviors)
on_request: callback to invoke on every Network.requestWillBeSent on_request: callback to invoke on every Network.requestWillBeSent
event, takes one argument, the json-decoded message (default event, takes one argument, the json-decoded message (default
None) None)
@ -447,7 +449,8 @@ class Browser:
jpeg_bytes = self.screenshot() jpeg_bytes = self.screenshot()
on_screenshot(jpeg_bytes) on_screenshot(jpeg_bytes)
behavior_script = brozzler.behavior_script( behavior_script = brozzler.behavior_script(
page_url, behavior_parameters) page_url, behavior_parameters,
behaviors_dir=behaviors_dir)
self.run_behavior(behavior_script, timeout=behavior_timeout) self.run_behavior(behavior_script, timeout=behavior_timeout)
if skip_extract_outlinks: if skip_extract_outlinks:
outlinks = [] outlinks = []