mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge pull request #13 from nlevitt/master
facebook, logging, timeout tweaks
This commit is contained in:
commit
a16ce4abeb
@ -5,20 +5,21 @@ var isOnScreen = function(e) {
|
|||||||
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
|
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
|
||||||
};
|
};
|
||||||
|
|
||||||
var THINGS_TO_CLICK_SELECTOR = 'a.UFIPagerLink > span, a.UFIPagerLink, a[href^="/browse/likes"], span.UFIReplySocialSentenceLinkText, a.photo';
|
// comments - 'a.UFIPagerLink > span, a.UFIPagerLink, span.UFIReplySocialSentenceLinkText'
|
||||||
|
var THINGS_TO_CLICK_SELECTOR = 'a[href^="/browse/likes"], *[rel="theater"]';
|
||||||
var alreadyClicked = {};
|
var alreadyClicked = {};
|
||||||
var intervalId;
|
var intervalId;
|
||||||
|
|
||||||
var intervalFunc = function() {
|
var intervalFunc = function() {
|
||||||
var closeButton = document.querySelector('a[title="Close"]');
|
var closeButton = document.querySelector('a[title="Close"]');
|
||||||
if (closeButton) {
|
if (closeButton) {
|
||||||
console.log("clicking close button " + closeButton);
|
console.log("clicking close button " + closeButton.outerHTML);
|
||||||
closeButton.click();
|
closeButton.click();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
var closeTheaterButton = document.querySelector('a.closeTheater');
|
var closeTheaterButton = document.querySelector('a.closeTheater');
|
||||||
if (closeTheaterButton && closeTheaterButton.offsetWidth > 0) {
|
if (closeTheaterButton && closeTheaterButton.offsetWidth > 0) {
|
||||||
console.log("clicking close button " + closeTheaterButton);
|
console.log("clicking close button " + closeTheaterButton.outerHTML);
|
||||||
closeTheaterButton.click();
|
closeTheaterButton.click();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -33,7 +34,7 @@ var intervalFunc = function() {
|
|||||||
if (isOnScreen(target)) {
|
if (isOnScreen(target)) {
|
||||||
// var pos = target.getBoundingClientRect().top;
|
// var pos = target.getBoundingClientRect().top;
|
||||||
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
|
// window.scrollTo(0, target.getBoundingClientRect().top - 100);
|
||||||
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target);
|
console.log("clicking at " + target.getBoundingClientRect().top + " on " + target.outerHTML);
|
||||||
target.click();
|
target.click();
|
||||||
target.style.border = '1px solid #0a0';
|
target.style.border = '1px solid #0a0';
|
||||||
alreadyClicked[target] = true;
|
alreadyClicked[target] = true;
|
||||||
|
@ -55,12 +55,26 @@ class UmbraWorker:
|
|||||||
self.idle_timer = None
|
self.idle_timer = None
|
||||||
|
|
||||||
def _reset_idle_timer(self):
|
def _reset_idle_timer(self):
|
||||||
|
def _idle_timeout():
|
||||||
|
self.logger.debug('idle timeout')
|
||||||
|
self.page_done.set()
|
||||||
|
if self.hard_stop_timer:
|
||||||
|
self.hard_stop_timer.cancel()
|
||||||
|
|
||||||
|
def _hard_timeout():
|
||||||
|
self.logger.debug('hard timeout')
|
||||||
|
self.page_done.set()
|
||||||
if self.idle_timer:
|
if self.idle_timer:
|
||||||
self.idle_timer.cancel()
|
self.idle_timer.cancel()
|
||||||
self.idle_timer = threading.Timer(10, self.page_done.set)
|
|
||||||
|
if self.idle_timer:
|
||||||
|
self.idle_timer.cancel()
|
||||||
|
|
||||||
|
self.idle_timer = threading.Timer(30, _idle_timeout)
|
||||||
self.idle_timer.start()
|
self.idle_timer.start()
|
||||||
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
|
|
||||||
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
|
if not self.hard_stop_timer: # 15 minutes is as long as we should give 1 page
|
||||||
|
self.hard_stop_timer = threading.Timer(900, _hard_timeout)
|
||||||
self.hard_stop_timer.start()
|
self.hard_stop_timer.start()
|
||||||
|
|
||||||
def visit_page(self, websock):
|
def visit_page(self, websock):
|
||||||
@ -72,6 +86,10 @@ class UmbraWorker:
|
|||||||
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||||
websock.send(msg)
|
websock.send(msg)
|
||||||
|
|
||||||
|
msg = dumps(dict(method="Console.enable", id=next(self.command_id)))
|
||||||
|
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||||
|
websock.send(msg)
|
||||||
|
|
||||||
msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url}))
|
msg = dumps(dict(method="Page.navigate", id=next(self.command_id), params={"url": self.url}))
|
||||||
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||||
websock.send(msg)
|
websock.send(msg)
|
||||||
@ -80,21 +98,29 @@ class UmbraWorker:
|
|||||||
payload = chrome_msg['params']['request']
|
payload = chrome_msg['params']['request']
|
||||||
payload['parentUrl'] = self.url
|
payload['parentUrl'] = self.url
|
||||||
payload['parentUrlMetadata'] = self.url_metadata
|
payload['parentUrlMetadata'] = self.url_metadata
|
||||||
self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange, self.client_id, payload))
|
self.logger.debug('sending to amqp exchange={} routing_key={} payload={}'.format(self.umbra.umbra_exchange.name, self.client_id, payload))
|
||||||
with self.umbra.producer_lock:
|
with self.umbra.producer_lock:
|
||||||
self.umbra.producer.publish(payload,
|
self.umbra.producer.publish(payload,
|
||||||
exchange=self.umbra.umbra_exchange,
|
exchange=self.umbra.umbra_exchange,
|
||||||
routing_key=self.client_id)
|
routing_key=self.client_id)
|
||||||
|
|
||||||
def handle_message(self, websock, message):
|
def handle_message(self, websock, message):
|
||||||
# self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95]))
|
# self.logger.debug("message from {} - {}".format(websock.url, message[:95]))
|
||||||
|
# self.logger.debug("message from {} - {}".format(websock.url, message))
|
||||||
message = loads(message)
|
message = loads(message)
|
||||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
if "method" in message and message["method"] == "Network.requestWillBeSent":
|
||||||
self._reset_idle_timer()
|
self._reset_idle_timer()
|
||||||
|
if not message["params"]["request"]["url"].lower().startswith("data:"):
|
||||||
self.send_request_to_amqp(message)
|
self.send_request_to_amqp(message)
|
||||||
elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
|
else:
|
||||||
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
|
self.logger.debug("ignoring data url {}".format(message["params"]["request"]["url"][:80]))
|
||||||
|
elif "method" in message and message["method"] == "Page.loadEventFired":
|
||||||
|
self.logger.debug("Page.loadEventFired, starting behaviors url={} message={}".format(self.url, message))
|
||||||
behaviors.execute(self.url, websock, self.command_id)
|
behaviors.execute(self.url, websock, self.command_id)
|
||||||
|
elif "method" in message and message["method"] == "Console.messageAdded":
|
||||||
|
self.logger.debug("{} console {} {}".format(websock.url,
|
||||||
|
message["params"]["message"]["level"],
|
||||||
|
message["params"]["message"]["text"]))
|
||||||
|
|
||||||
class Umbra:
|
class Umbra:
|
||||||
logger = logging.getLogger('umbra.Umbra')
|
logger = logging.getLogger('umbra.Umbra')
|
||||||
@ -119,7 +145,7 @@ class Umbra:
|
|||||||
def consume_amqp(self):
|
def consume_amqp(self):
|
||||||
self.umbra_exchange = Exchange(name='umbra', type='direct', durable=True)
|
self.umbra_exchange = Exchange(name='umbra', type='direct', durable=True)
|
||||||
url_queue = Queue('urls', routing_key='url', exchange=self.umbra_exchange)
|
url_queue = Queue('urls', routing_key='url', exchange=self.umbra_exchange)
|
||||||
self.logger.info("connecting to amqp {} at {}".format(repr(self.umbra_exchange), self.amqp_url))
|
self.logger.info("connecting to amqp exchange={} at {}".format(self.umbra_exchange.name, self.amqp_url))
|
||||||
with Connection(self.amqp_url) as conn:
|
with Connection(self.amqp_url) as conn:
|
||||||
self.producer = conn.Producer(serializer='json')
|
self.producer = conn.Producer(serializer='json')
|
||||||
self.producer_lock = threading.Lock()
|
self.producer_lock = threading.Lock()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user