Merge remote-tracking branch 'umbra/master'

* umbra/master:
  Handle Python to JS boolean conversion
  Allow clicking on already clicked element to continue in behaviors if click_until_hard_timeout is set to true
  Make Umbra click on 'Load More' button for youtube pages
  catch and log exception deleting temporary work directory
  update detection of modal close button for facebook changes
  Add custom behavior for Brooklyn Museum.
This commit is contained in:
Noah Levitt 2016-03-07 17:37:12 -08:00
commit 4874eaccbb
4 changed files with 28 additions and 3 deletions

View File

@ -8,7 +8,11 @@ var umbraBehavior = {
var somethingLeftBelow = false;
var somethingLeftAbove = false;
var cssSelector = "${click_css_selector}";
var clickUntilTimeout = "${click_until_hard_timeout}";
//handle Python to JavaScript boolean conversion
clickUntilTimeout == "True" ? clickUntilTimeout = true : clickUntilTimeout = false;
var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1);
documents[0] = document;
@ -22,7 +26,7 @@ var umbraBehavior = {
var clickTargets = documents[j].querySelectorAll(cssSelector);
for ( var i = 0; i < clickTargets.length; i++) {
if (clickTargets[i].umbraClicked) {
if (clickTargets[i].umbraClicked && !clickUntilTimeout) {
continue;
}

View File

@ -34,7 +34,11 @@ class Behavior:
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
behavior["script"] = open(behavior_js, encoding="utf-8").read()
elif "click_css_selector" in behavior:
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"])
if "click_until_hard_timeout" in behavior:
click_until_hard_timeout_value=behavior["click_until_hard_timeout"]
else:
click_until_hard_timeout_value = False
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"], click_until_hard_timeout=click_until_hard_timeout_value)
return Behavior._behaviors

View File

@ -24,6 +24,10 @@ behaviors:
url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
behavior_js: instagram.js
request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$'
click_css_selector: img.img-responsive
request_idle_timeout_sec: 10
- # acalog https://webarchive.jira.com/browse/ARI-3775
url_regex: '^https?://.*[?&]catoid=[^?]*$'
click_css_selector: a[onclick]
@ -40,6 +44,15 @@ behaviors:
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
click_css_selector: button.playButton.medium
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4690
url_regex: '^https?://(?:www\.)?youtube.com/.*$'
click_css_selector: span.load-more-text
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4725
url_regex: '^https?://(?:www\.)?moma.org/.*$'
click_css_selector: button[data-more-results-bottom-button]
click_until_hard_timeout: True
request_idle_timeout_sec: 10
- # default fallback brhavior
url_regex: '^.*$'
request_idle_timeout_sec: 10

View File

@ -125,7 +125,11 @@ class Browser:
if self.is_running():
self._chrome_instance.stop()
self._chrome_instance = None
self._work_dir.cleanup()
try:
self._work_dir.cleanup()
except:
self.logger.error("exception deleting %s", self._work_dir,
exc_info=True)
self._work_dir = None
self._websocket_url = None
except: