diff --git a/umbra/behaviors.json b/umbra/behaviors.json index 095491c..7774ec9 100644 --- a/umbra/behaviors.json +++ b/umbra/behaviors.json @@ -1,27 +1,14 @@ [ { "scripts": [ - "setInterval(function() { window.scrollBy(10000,10000); }, 1000);" - ], - "site": ".*" - }, - { - "scripts": [ - "setTimeout(function() { setInterval(function() { a = document.evaluate( '//a[(@href = \"#\" and @role = \"button\" and contains(.,\"more comments\")) or starts-with(@href, \"/browse/likes\")]', document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 1000);}, 5000);" + "setInterval(function() { window.scrollBy(0,500); }, 150);" ], "site": ".*facebook.com.*" }, { "scripts": [ - "setTimeout(function() { a = document.evaluate( \"//a[contains(@class, 'sn-ico-slideshow')]\", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 5000);" - ], - "site": ".*flickr.com.*" - }, - { - "scripts": [ - "setTimeout(function() { a = document.evaluate( \"//a[contains(@data-track, 'photo-click')]\", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); setInterval(function() { f = a.iterateNext(); f.click();}, 5000)}, 5000);" + "setInterval(function() { window.scrollBy(0,50); }, 50);" ], "site": ".*flickr.com.*" } - ] diff --git a/umbra/behaviors.py b/umbra/behaviors.py index 8c9733e..c92b2f2 100644 --- a/umbra/behaviors.py +++ b/umbra/behaviors.py @@ -8,7 +8,6 @@ import logging behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json']) def execute(url, websock, command_id): logger = logging.getLogger('behaviors') - sleep(5) with open(behaviors_file) as js: behaviors = load(js) for behavior in behaviors: diff --git a/umbra/umbra.py b/umbra/umbra.py index 5e8a584..d03bde9 100755 --- a/umbra/umbra.py +++ b/umbra/umbra.py @@ -48,7 +48,7 @@ class UmbraWorker: def _reset_idle_timer(self): if self.idle_timer: self.idle_timer.cancel() - self.idle_timer = threading.Timer(10, self.page_done.set) + self.idle_timer = threading.Timer(60, self.page_done.set) self.idle_timer.start() def visit_page(self, websock): @@ -56,35 +56,34 @@ class UmbraWorker: self.logger.debug('sending message to {}: {}'.format(websock, msg)) websock.send(msg) + msg = dumps(dict(method="Page.enable", id=next(self.command_id))) + self.logger.debug('sending message to {}: {}'.format(websock, msg)) + websock.send(msg) + msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url})) self.logger.debug('sending message to {}: {}'.format(websock, msg)) websock.send(msg) - from umbra import behaviors - behaviors.execute(self.url, websock, self.command_id) + def send_request_to_amqp(self, chrome_msg): + payload = chrome_msg['params']['request'] + payload['parentUrl'] = self.url + payload['parentUrlMetadata'] = self.url_metadata + self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange, self.client_id, payload)) + with self.umbra.producer_lock: + self.umbra.producer.publish(payload, + exchange=self.umbra.umbra_exchange, + routing_key=self.client_id) def handle_message(self, websock, message): - # self.logger.debug("handling message from websocket {} - {}".format(websock, message)) + self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95])) + self._reset_idle_timer() message = loads(message) if "method" in message.keys() and message["method"] == "Network.requestWillBeSent": - self._reset_idle_timer() - payload = message['params']['request'] - payload['parentUrl'] = self.url - payload['parentUrlMetadata'] = self.url_metadata - self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange, self.client_id, payload)) - # bind a queue with the same name as the return routing key - # (AMQPUrlReceiver in heritrix expects this) - request_queue = Queue(self.client_id, - routing_key=self.client_id, - exchange=self.umbra.umbra_exchange) - with self.umbra.producer_lock: - # self.umbra.producer.publish(payload, - # routing_key=self.client_id, - # exchange=self.umbra.umbra_exchange, - # declare=[request_queue]) - self.umbra.producer.publish(payload, - routing_key=self.client_id, - exchange=self.umbra.umbra_exchange) + self.send_request_to_amqp(message) + elif "method" in message.keys() and message["method"] == "Page.loadEventFired": + self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url)) + from umbra import behaviors + behaviors.execute(self.url, websock, self.command_id) def get_message_handler(self, url, url_metadata, command_id): this_watchdog = self.watchdog(command_id)