From 61cec15fffbbacf03df95c91b85c3a47192039c0 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 3 May 2016 22:06:03 +0000 Subject: [PATCH 1/2] Restructure browser.py to take screenshot after behavior script. --- brozzler/browser.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index 4344a0c..92f2b86 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -174,6 +174,7 @@ class Browser: self.on_screenshot = on_screenshot self.on_url_change = on_url_change + self._waiting_on_scroll_to_top_msg_id = None self._waiting_on_screenshot_msg_id = None self._waiting_on_document_url_msg_id = None self._waiting_on_outlinks_msg_id = None @@ -181,6 +182,7 @@ class Browser: self._reached_limit = None self._aw_snap_hes_dead_jim = None self._abort_browse_page = False + self._has_screenshot = False self._websock = websocket.WebSocketApp(self._websocket_url, on_open=self._visit_page, on_message=self._wrap_handle_message) @@ -226,6 +228,15 @@ class Browser: raise BrowsingException("""chrome tab went "aw snap" or "he's dead jim"!""") elif (self._behavior != None and self._behavior.is_finished() or time.time() - self._start > Browser.HARD_TIMEOUT_SECONDS): + + if ( not self._has_screenshot and not self._waiting_on_scroll_to_top_msg_id and not self._waiting_on_screenshot_msg_id): + self.logger.info("Behaviors finished, requesting screenshot %s", self.url) + self._waiting_on_scroll_to_top_msg_id = self.send_to_chrome(method="Runtime.evaluate", + params={"expression":"window.scrollTo(0, 0);"}) + return False + elif not self._has_screenshot and (self._waiting_on_scroll_to_top_msg_id or self._waiting_on_screenshot_msg_id): + return False + if self._outlinks: self.logger.info("got outlinks, finished browsing %s", self.url) return True @@ -246,6 +257,7 @@ class Browser: return False else: # self._waiting_on_outlinks_msg_id return False + elif time.time() - self._start > Browser.HARD_TIMEOUT_SECONDS: return True elif self._reached_limit: @@ -307,8 +319,10 @@ class Browser: self.logger.info("reached limit %s", self._reached_limit) def _page_load_event_fired(self, message): - self.logger.info("Page.loadEventFired, requesting screenshot url={} message={}".format(self.url, message)) - self._waiting_on_screenshot_msg_id = self.send_to_chrome(method="Page.captureScreenshot") + self.logger.info("Page.loadEventFired, moving on to starting behaviors url={}".format(self.url)) + self._behavior = Behavior(self.url, self) + self._behavior.start() + self._waiting_on_document_url_msg_id = self.send_to_chrome(method="Runtime.evaluate", params={"expression":"document.URL"}) def _console_message_added(self, message): @@ -333,9 +347,11 @@ class Browser: if self.on_screenshot: self.on_screenshot(base64.b64decode(message["result"]["data"])) self._waiting_on_screenshot_msg_id = None - self.logger.info("got screenshot, moving on to starting behaviors url={}".format(self.url)) - self._behavior = Behavior(self.url, self) - self._behavior.start() + self._has_screenshot = True + self.logger.info("got screenshot, moving on to getting outlinks url={}".format(self.url)) + elif message["id"] == self._waiting_on_scroll_to_top_msg_id: + self._waiting_on_screenshot_msg_id = self.send_to_chrome(method="Page.captureScreenshot") + self._waiting_on_scroll_to_top_msg_id = None elif message["id"] == self._waiting_on_outlinks_msg_id: self.logger.debug("got outlinks message=%s", message) self._outlinks = frozenset(message["result"]["result"]["value"].split(" ")) From 6e4e28d2dfb51d113f8d3f33fd7d4b58560c50fb Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 5 May 2016 01:03:57 +0000 Subject: [PATCH 2/2] Modifying default.js behavior to stop the interval function when umbraBehaviorFinished returns true We should do this in all behaviors ultimately to stop the behavior script upon completion --- brozzler/behaviors.d/default.js | 1 + 1 file changed, 1 insertion(+) diff --git a/brozzler/behaviors.d/default.js b/brozzler/behaviors.d/default.js index 04f81d7..74b1539 100644 --- a/brozzler/behaviors.d/default.js +++ b/brozzler/behaviors.d/default.js @@ -138,6 +138,7 @@ var umbraBehaviorFinished = function() { if (umbraState.idleSince != null) { var idleTimeMs = Date.now() - umbraState.idleSince; if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) { + clearInterval(umbraIntervalId) return true; } }