diff --git a/README.rst b/README.rst index 9f9c28a..c471ae1 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -.. image:: https://travis-ci.org/internetarchive/brozzler.svg +.. image:: https://api.travis-ci.org/internetarchive/brozzler.svg?branch=master :target: https://travis-ci.org/internetarchive/brozzler .. |logo| image:: https://cdn.rawgit.com/internetarchive/brozzler/1.1b12/brozzler/dashboard/static/brozzler.svg diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 7667627..32ab412 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -25,7 +25,7 @@ interval: 1000 actions: - selector: a[data-testid="snapshot_footer_link"] - childSelector: i[class="_271o img sp_-vbjDsgypf1 sx_1de63f"] + childSelector: i[class="_271o img sp_KBE8sh--02o sx_5d0205"] closeSelector: 'div._7lq1 > button' - url_regex: '^https?://(?:www\.)?facebook\.com/.*$' @@ -39,7 +39,7 @@ url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js_template: umbraBehavior.js.j2 default_parameters: - interval: 500 + interval: 2000 actions: - selector: .glyphsSpriteGrey_Close rmSelector: '.RnEpo' diff --git a/brozzler/chrome.py b/brozzler/chrome.py index cbca3e5..671000b 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -177,6 +177,9 @@ class Chrome: '--disable-web-security', '--disable-notifications', '--disable-extensions', '--disable-save-password-bubble'] + extra_chrome_args = os.environ.get('BROZZLER_EXTRA_CHROME_ARGS') + if extra_chrome_args: + chrome_args.extend(extra_chrome_args.split()) if disk_cache_dir: chrome_args.append('--disk-cache-dir=%s' % disk_cache_dir) if disk_cache_size: diff --git a/brozzler/model.py b/brozzler/model.py index 77dae70..f65fe50 100644 --- a/brozzler/model.py +++ b/brozzler/model.py @@ -43,13 +43,20 @@ class JobValidator(cerberus.Validator): return url.scheme in ('http', 'https', 'ftp') class InvalidJobConf(Exception): - def __init__(self, errors): - self.errors = errors + def __init__(self, validator): + self.errors = validator.errors + try: + # Cerberus does a nice job hiding the bad value. In the case I + # debugged, I found it here. Maybe there's a better way to see it. + value = validator._errors[0].info[0][0].info[0][0].value + self.errors['bad value'] = value + except: + value = None def validate_conf(job_conf, schema=load_schema()): v = JobValidator(schema) if not v.validate(job_conf, normalize=False): - raise InvalidJobConf(v.errors) + raise InvalidJobConf(v) def merge(a, b): if isinstance(a, dict) and isinstance(b, dict): diff --git a/setup.py b/setup.py index d5442a6..ef6391d 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.15', + version='1.5.17', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',