diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 3f57bfa..9c4953d 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -17,85 +17,88 @@ # # first matched behavior is used, so order matters here -behaviors: - - - url_regex: '^https?://(?:www\.)?facebook\.com/.*$' - behavior_js_template: facebook.js.template - # default_parameters: - # parameter_username: jdoe@example.com - # parameter_password: abcd1234 - request_idle_timeout_sec: 30 - - - url_regex: '^https?://(?:www\.)?marquette\.edu/.*$' - behavior_js: marquette_edu.js - request_idle_timeout_sec: 10 - - - url_regex: '^https?://(?:www\.)?vimeo\.com/.*$' - behavior_js: vimeo.js - request_idle_timeout_sec: 10 - - - url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' - behavior_js: psu24.js - request_idle_timeout_sec: 10 - - - url_regex: '^https?://(?:www\.)?instagram\.com/.*$' - behavior_js: instagram.js - request_idle_timeout_sec: 10 - - - url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: img.img-responsive - request_idle_timeout_sec: 10 - - # acalog https://webarchive.jira.com/browse/ARI-3775 - url_regex: '^https?://.*[?&]catoid=[^?]*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: a[onclick] - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/ARI-3956 - url_regex: '^https?://(?:www\.)?usask.ca/.*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: a[id='feature-next'] - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/AITFIVE-451 - url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' - behavior_js_template: simpledo.js.template - default_parameters: - sdo_css_selector: button.sc-button-play, button.playButton - sdo_action: click - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/AITFIVE-463 - url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: button.playButton.medium - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/ARI-4690 - url_regex: '^https?://(?:www\.)?youtube.com/.*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: span.load-more-text - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/ARI-4725 - url_regex: '^https?://(?:www\.)?moma.org/.*$' - behavior_js_template: simpleclicks.js.template - default_parameters: - click_css_selector: button[data-more-results-bottom-button] - click_until_hard_timeout: True - request_idle_timeout_sec: 10 - - # https://webarchive.jira.com/browse/ARI-4692 - url_regex: '^https?://(?:www\.)?fec.gov/data/.*$' - behavior_js: fec_gov.js - request_idle_timeout_sec: 10 - - url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' - behavior_js_template: simpledo.js.template - default_parameters: - sdo_css_selector: .menu-item a - sdo_action: mouseover - request_idle_timeout_sec: 10 - - # default fallback behavior - url_regex: '^.*$' - request_idle_timeout_sec: 10 - behavior_js: default.js +- + url_regex: '^https?://(?:www\.)?facebook\.com/.*$' + behavior_js_template: facebook.js + request_idle_timeout_sec: 30 +- + url_regex: '^https?://(?:www\.)?marquette\.edu/.*$' + behavior_js_template: marquette_edu.js + request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?vimeo\.com/.*$' + behavior_js_template: vimeo.js + request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?psu24.psu.edu/.*$' + behavior_js_template: psu24.js + request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?instagram\.com/.*$' + behavior_js_template: instagram.js + request_idle_timeout_sec: 10 +- + url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: img.img-responsive + click_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # acalog https://webarchive.jira.com/browse/ARI-3775 + url_regex: '^https?://.*[?&]catoid=[^?]*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: a[onclick] + click_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/ARI-3956 + url_regex: '^https?://(?:www\.)?usask.ca/.*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: a[id='feature-next'] + click_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/AITFIVE-451 + url_regex: '^https?://(?:www\.)?soundcloud.com/.*$' + behavior_js_template: simpledo.js.j2 + default_parameters: + sdo_css_selector: button.sc-button-play, button.playButton + sdo_action: click + sdo_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/AITFIVE-463 + url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: button.playButton.medium + click_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/ARI-4690 + url_regex: '^https?://(?:www\.)?youtube.com/.*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: span.load-more-text + click_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/ARI-4725 + url_regex: '^https?://(?:www\.)?moma.org/.*$' + behavior_js_template: simpleclicks.js.j2 + default_parameters: + click_css_selector: button[data-more-results-bottom-button] + click_until_hard_timeout: True + request_idle_timeout_sec: 10 +- # https://webarchive.jira.com/browse/ARI-4692 + url_regex: '^https?://(?:www\.)?fec.gov/data/.*$' + behavior_js_template: fec_gov.js + request_idle_timeout_sec: 10 +- url_regex: '^https?://(?:www\.)?news\.com\.au/.*$' + behavior_js_template: simpledo.js.j2 + default_parameters: + sdo_css_selector: .menu-item a + sdo_action: mouseover + sdo_until_hard_timeout: False + request_idle_timeout_sec: 10 +- # default fallback behavior + url_regex: '^.*$' + request_idle_timeout_sec: 10 + behavior_js_template: default.js diff --git a/brozzler/js-templates/simpledo.js.j2 b/brozzler/js-templates/simpledo.js.j2 index 14580c1..8a0a819 100644 --- a/brozzler/js-templates/simpledo.js.j2 +++ b/brozzler/js-templates/simpledo.js.j2 @@ -26,12 +26,9 @@ var umbraBehavior = { var didSomething = false; var somethingLeftBelow = false; var somethingLeftAbove = false; - var cssSelector = "${sdo_css_selector}"; - var doAction = "${sdo_action}"; // currently supports click, mouseover - var doUntilTimeout = "${sdo_until_hard_timeout}"; - - //handle Python to JavaScript boolean conversion - doUntilTimeout == "True" ? doUntilTimeout = true : doUntilTimeout = false; + var cssSelector = {{sdo_css_selector}}; + var doAction = {{sdo_action}}; // currently supports click, mouseover + var doUntilTimeout = {{sdo_until_hard_timeout}}; var iframes = document.querySelectorAll("iframe"); var documents = Array(iframes.length + 1);