From dacfba330ce8dcafd68b2e8a60463e42a71cf180 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 4 Jan 2018 17:37:02 +0000 Subject: [PATCH] Configurable JS templates location Brozzler has hard-coded the JS templates logic in ``brozzler/behaviors.yaml`` and ``brozzler/js-templates/`` locations. With this change, you can use the optional ``behaviors_dir`` ``browser.browse_page`` parameter to set a custom location and use any potential JS behaviors. --- brozzler/__init__.py | 29 +++++++++++++++++++---------- brozzler/browser.py | 7 +++++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 1c410ab..46ebf29 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -68,29 +68,34 @@ logging._levelToName[TRACE] = 'TRACE' logging._nameToLevel['TRACE'] = TRACE _behaviors = None -def behaviors(): +def behaviors(behaviors_dir=None): + """Return list of JS behaviors loaded from YAML file. + + :param behaviors_dir: Directory containing `behaviors.yaml` and + `js-templates/`. Defaults to brozzler dir. + """ import os, yaml, string global _behaviors if _behaviors is None: - behaviors_yaml = os.path.join( - os.path.dirname(__file__), 'behaviors.yaml') + cwd = behaviors_dir or os.path.dirname(__file__) + behaviors_yaml = os.path.join(cwd, 'behaviors.yaml') with open(behaviors_yaml) as fin: _behaviors = yaml.load(fin) return _behaviors -def behavior_script(url, template_parameters=None): +def behavior_script(url, template_parameters=None, behaviors_dir=None): ''' Returns the javascript behavior string populated with template_parameters. ''' import re, logging - for behavior in behaviors(): + for behavior in behaviors(behaviors_dir=behaviors_dir): if re.match(behavior['url_regex'], url): parameters = dict() if 'default_parameters' in behavior: parameters.update(behavior['default_parameters']) if template_parameters: parameters.update(template_parameters) - template = jinja2_environment().get_template( + template = jinja2_environment(behaviors_dir).get_template( behavior['behavior_js_template']) script = template.render(parameters) logging.info( @@ -229,12 +234,16 @@ def sleep(duration): time.sleep(min(duration - elapsed, 0.5)) _jinja2_env = None -def jinja2_environment(): +def jinja2_environment(behaviors_dir=None): global _jinja2_env if not _jinja2_env: - import jinja2, json - _jinja2_env = jinja2.Environment( - loader=jinja2.PackageLoader('brozzler', 'js-templates')) + import os, jinja2, json + if behaviors_dir: + _loader = jinja2.FileSystemLoader(os.path.join(behaviors_dir, + 'js-templates')) + else: + _loader=jinja2.PackageLoader('brozzler', 'js-templates') + _jinja2_env = jinja2.Environment(loader=_loader) _jinja2_env.filters['json'] = json.dumps return _jinja2_env diff --git a/brozzler/browser.py b/brozzler/browser.py index e827edf..66691ee 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -377,7 +377,7 @@ class Browser: def browse_page( self, page_url, extra_headers=None, - user_agent=None, behavior_parameters=None, + user_agent=None, behavior_parameters=None, behaviors_dir=None, on_request=None, on_response=None, on_screenshot=None, username=None, password=None, hashtags=None, skip_extract_outlinks=False, skip_visit_hashtags=False, @@ -397,6 +397,8 @@ class Browser: supplied (default None) behavior_parameters: dict of parameters for populating the javascript behavior template (default None) + behaviors_dir: Directory containing behaviors.yaml and JS templates + (default None loads Brozzler default JS behaviors) on_request: callback to invoke on every Network.requestWillBeSent event, takes one argument, the json-decoded message (default None) @@ -447,7 +449,8 @@ class Browser: jpeg_bytes = self.screenshot() on_screenshot(jpeg_bytes) behavior_script = brozzler.behavior_script( - page_url, behavior_parameters) + page_url, behavior_parameters, + behaviors_dir=behaviors_dir) self.run_behavior(behavior_script, timeout=behavior_timeout) if skip_extract_outlinks: outlinks = []