diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index f60b5fb..05e7fce 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -25,8 +25,6 @@ url_regex: '^https?://(?:www\.)?instagram\.com/.*$' behavior_js_template: umbraBehavior.js.j2 default_parameters: - interval: 500 - skip_iframes: true actions: - selector: button.coreSpriteDismissLarge - selector: 'a>.eLAPa>.KL4Bh' diff --git a/brozzler/js-templates/umbraBehavior.js.j2 b/brozzler/js-templates/umbraBehavior.js.j2 index 7c24a13..dc4f083 100644 --- a/brozzler/js-templates/umbraBehavior.js.j2 +++ b/brozzler/js-templates/umbraBehavior.js.j2 @@ -26,11 +26,6 @@ class UmbraBehavior { this.idleSince = null; this.intervalId = null; this.intervalTimeMs = {{interval or 300}}; - {% if skip_iframes %} - this.skipIframes = true; - {% else %} - this.skipIframes = false; - {% endif %} this.index = 0; } @@ -48,11 +43,15 @@ class UmbraBehavior { var documents = []; documents[0] = document; - if (!(this.skipIframes)) { - var iframes = document.querySelectorAll("iframe"); - var iframesLength = iframes.length; - for (var i = 0; i < iframesLength; i++) { + var iframes = document.querySelectorAll("iframe"); + var iframesLength = iframes.length; + for (var i = 0; i < iframesLength; i++) { + try { documents.push(iframes[i].contentWindow.document); + } catch (e) { + // it'd be too much logging because this is common: + // SecurityError: Blocked a frame with origin "..." from accessing a cross-origin frame + // console.log("exception looking at iframe" + iframes[i] + ": " + e); } } var documentsLength = documents.length; diff --git a/setup.py b/setup.py index 1fb7ff8..98bcf3e 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.dev317', + version='1.5.dev319', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',