Merge pull request #13 from nlevitt/master

facebook, logging, timeout tweaks
This commit is contained in:
vonrosen 2014-03-09 16:47:19 -07:00
commit a16ce4abeb
2 changed files with 41 additions and 14 deletions

View File

@ -5,20 +5,21 @@ var isOnScreen = function(e) {
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight; return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
}; };
var THINGS_TO_CLICK_SELECTOR = 'a.UFIPagerLink > span, a.UFIPagerLink, a[href^="/browse/likes"], span.UFIReplySocialSentenceLinkText, a.photo'; // comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText'
var THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]';
var alreadyClicked = {}; var alreadyClicked = {};
var intervalId; var intervalId;
var intervalFunc = function() { var intervalFunc = function() {
var closeButton = document.querySelector('a[title="Close"]'); var closeButton = document.querySelector('a[title="Close"]');
if (closeButton) { if (closeButton) {
console.log("clicking close button " + closeButton); console.log("clicking close button " + closeButton.outerHTML);
closeButton.click(); closeButton.click();
return; return;
} }
var closeTheaterButton = document.querySelector('a.closeTheater'); var closeTheaterButton = document.querySelector('a.closeTheater');
if (closeTheaterButton && closeTheaterButton.offsetWidth > 0) { if (closeTheaterButton && closeTheaterButton.offsetWidth > 0) {
console.log("clicking close button " + closeTheaterButton); console.log("clicking close button " + closeTheaterButton.outerHTML);
closeTheaterButton.click(); closeTheaterButton.click();
return; return;
} }
@ -33,7 +34,7 @@ var intervalFunc = function() {
if (isOnScreen(target)) { if (isOnScreen(target)) {
// var pos = target.getBoundingClientRect().top; // var pos = target.getBoundingClientRect().top;
// window.scrollTo(0, target.getBoundingClientRect().top - 100); // window.scrollTo(0, target.getBoundingClientRect().top - 100);
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target); console.log("clicking at " + target.getBoundingClientRect().top + " on " + target.outerHTML);
target.click(); target.click();
target.style.border = '1px solid #0a0'; target.style.border = '1px solid #0a0';
alreadyClicked[target] = true; alreadyClicked[target] = true;

View File

@ -55,12 +55,26 @@ class UmbraWorker:
self.idle_timer = None self.idle_timer = None
def _reset_idle_timer(self): def _reset_idle_timer(self):
def _idle_timeout():
self.logger.debug('idle timeout')
self.page_done.set()
if self.hard_stop_timer:
self.hard_stop_timer.cancel()
def _hard_timeout():
self.logger.debug('hard timeout')
self.page_done.set()
if self.idle_timer:
self.idle_timer.cancel()
if self.idle_timer: if self.idle_timer:
self.idle_timer.cancel() self.idle_timer.cancel()
self.idle_timer = threading.Timer(10, self.page_done.set)
self.idle_timer = threading.Timer(30, _idle_timeout)
self.idle_timer.start() self.idle_timer.start()
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
self.hard_stop_timer = threading.Timer(600, self.page_done.set) if not self.hard_stop_timer: # 15 minutes is as long as we should give 1 page
self.hard_stop_timer = threading.Timer(900, _hard_timeout)
self.hard_stop_timer.start() self.hard_stop_timer.start()
def visit_page(self, websock): def visit_page(self, websock):
@ -72,6 +86,10 @@ class UmbraWorker:
self.logger.debug('sending message to {}: {}'.format(websock, msg)) self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg) websock.send(msg)
msg = dumps(dict(method="Console.enable", id=next(self.command_id)))
self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg)
msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url})) msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url}))
self.logger.debug('sending message to {}: {}'.format(websock, msg)) self.logger.debug('sending message to {}: {}'.format(websock, msg))
websock.send(msg) websock.send(msg)
@ -80,21 +98,29 @@ class UmbraWorker:
payload = chrome_msg['params']['request'] payload = chrome_msg['params']['request']
payload['parentUrl'] = self.url payload['parentUrl'] = self.url
payload['parentUrlMetadata'] = self.url_metadata payload['parentUrlMetadata'] = self.url_metadata
self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange, self.client_id, payload)) self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange.name, self.client_id, payload))
with self.umbra.producer_lock: with self.umbra.producer_lock:
self.umbra.producer.publish(payload, self.umbra.producer.publish(payload,
exchange=self.umbra.umbra_exchange, exchange=self.umbra.umbra_exchange,
routing_key=self.client_id) routing_key=self.client_id)
def handle_message(self, websock, message): def handle_message(self, websock, message):
# self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95])) # self.logger.debug("message from {} - {}".format(websock.url, message[:95]))
# self.logger.debug("message from {} - {}".format(websock.url, message))
message = loads(message) message = loads(message)
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent": if "method" in message and message["method"] == "Network.requestWillBeSent":
self._reset_idle_timer() self._reset_idle_timer()
self.send_request_to_amqp(message) if not message["params"]["request"]["url"].lower().startswith("data:"):
elif "method" in message.keys() and message["method"] == "Page.loadEventFired": self.send_request_to_amqp(message)
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url)) else:
self.logger.debug("ignoring data url {}".format(message["params"]["request"]["url"][:80]))
elif "method" in message and message["method"] == "Page.loadEventFired":
self.logger.debug("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message))
behaviors.execute(self.url, websock, self.command_id) behaviors.execute(self.url, websock, self.command_id)
elif "method" in message and message["method"] == "Console.messageAdded":
self.logger.debug("{} console {} {}".format(websock.url,
message["params"]["message"]["level"],
message["params"]["message"]["text"]))
class Umbra: class Umbra:
logger = logging.getLogger('umbra.Umbra') logger = logging.getLogger('umbra.Umbra')
@ -119,7 +145,7 @@ class Umbra:
def consume_amqp(self): def consume_amqp(self):
self.umbra_exchange = Exchange(name='umbra', type='direct', durable=True) self.umbra_exchange = Exchange(name='umbra', type='direct', durable=True)
url_queue = Queue('urls', routing_key='url', exchange=self.umbra_exchange) url_queue = Queue('urls', routing_key='url', exchange=self.umbra_exchange)
self.logger.info("connecting to amqp {} at {}".format(repr(self.umbra_exchange), self.amqp_url)) self.logger.info("connecting to amqp exchange={} at {}".format(self.umbra_exchange.name, self.amqp_url))
with Connection(self.amqp_url) as conn: with Connection(self.amqp_url) as conn:
self.producer = conn.Producer(serializer='json') self.producer = conn.Producer(serializer='json')
self.producer_lock = threading.Lock() self.producer_lock = threading.Lock()