Merge remote-tracking branch 'umbra/master'

* umbra/master:
  Handle Python to JS boolean conversion
  Allow clicking on already clicked element to continue in behaviors if click_until_hard_timeout is set to true
  Make Umbra click on 'Load More' button for youtube pages
  catch and log exception deleting temporary work directory
  update detection of modal close button for facebook changes
  Add custom behavior for Brooklyn Museum.
This commit is contained in:
Noah Levitt 2016-03-07 17:37:12 -08:00
commit 4874eaccbb
4 changed files with 28 additions and 3 deletions

View file

@ -8,6 +8,10 @@ var umbraBehavior = {
var somethingLeftBelow = false; var somethingLeftBelow = false;
var somethingLeftAbove = false; var somethingLeftAbove = false;
var cssSelector = "${click_css_selector}"; var cssSelector = "${click_css_selector}";
var clickUntilTimeout = "${click_until_hard_timeout}";
//handle Python to JavaScript boolean conversion
clickUntilTimeout == "True" ? clickUntilTimeout = true : clickUntilTimeout = false;
var iframes = document.querySelectorAll("iframe"); var iframes = document.querySelectorAll("iframe");
var documents = Array(iframes.length + 1); var documents = Array(iframes.length + 1);
@ -22,7 +26,7 @@ var umbraBehavior = {
var clickTargets = documents[j].querySelectorAll(cssSelector); var clickTargets = documents[j].querySelectorAll(cssSelector);
for ( var i = 0; i < clickTargets.length; i++) { for ( var i = 0; i < clickTargets.length; i++) {
if (clickTargets[i].umbraClicked) { if (clickTargets[i].umbraClicked && !clickUntilTimeout) {
continue; continue;
} }

View file

@ -34,7 +34,11 @@ class Behavior:
behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]]) behavior_js = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ["behaviors.d"] + [behavior["behavior_js"]])
behavior["script"] = open(behavior_js, encoding="utf-8").read() behavior["script"] = open(behavior_js, encoding="utf-8").read()
elif "click_css_selector" in behavior: elif "click_css_selector" in behavior:
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"]) if "click_until_hard_timeout" in behavior:
click_until_hard_timeout_value=behavior["click_until_hard_timeout"]
else:
click_until_hard_timeout_value = False
behavior["script"] = simpleclicks_js_template.substitute(click_css_selector=behavior["click_css_selector"], click_until_hard_timeout=click_until_hard_timeout_value)
return Behavior._behaviors return Behavior._behaviors

View file

@ -24,6 +24,10 @@ behaviors:
url_regex: '^https?://(?:www\.)?instagram\.com/.*$' url_regex: '^https?://(?:www\.)?instagram\.com/.*$'
behavior_js: instagram.js behavior_js: instagram.js
request_idle_timeout_sec: 10 request_idle_timeout_sec: 10
-
url_regex: '^https?://(?:www\.)?brooklynmuseum\.org/exhibitions/.*$'
click_css_selector: img.img-responsive
request_idle_timeout_sec: 10
- # acalog https://webarchive.jira.com/browse/ARI-3775 - # acalog https://webarchive.jira.com/browse/ARI-3775
url_regex: '^https?://.*[?&]catoid=[^?]*$' url_regex: '^https?://.*[?&]catoid=[^?]*$'
click_css_selector: a[onclick] click_css_selector: a[onclick]
@ -40,6 +44,15 @@ behaviors:
url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$' url_regex: '^https?://(?:www\.)?christophercerrone.com/.*$'
click_css_selector: button.playButton.medium click_css_selector: button.playButton.medium
request_idle_timeout_sec: 10 request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4690
url_regex: '^https?://(?:www\.)?youtube.com/.*$'
click_css_selector: span.load-more-text
request_idle_timeout_sec: 10
- # https://webarchive.jira.com/browse/ARI-4725
url_regex: '^https?://(?:www\.)?moma.org/.*$'
click_css_selector: button[data-more-results-bottom-button]
click_until_hard_timeout: True
request_idle_timeout_sec: 10
- # default fallback brhavior - # default fallback brhavior
url_regex: '^.*$' url_regex: '^.*$'
request_idle_timeout_sec: 10 request_idle_timeout_sec: 10

View file

@ -125,7 +125,11 @@ class Browser:
if self.is_running(): if self.is_running():
self._chrome_instance.stop() self._chrome_instance.stop()
self._chrome_instance = None self._chrome_instance = None
self._work_dir.cleanup() try:
self._work_dir.cleanup()
except:
self.logger.error("exception deleting %s", self._work_dir,
exc_info=True)
self._work_dir = None self._work_dir = None
self._websocket_url = None self._websocket_url = None
except: except: