mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
Merge branch 'master' of github.com:internetarchive/umbra
This commit is contained in:
commit
cf88b9968c
@ -22,8 +22,7 @@ Install via pip from this repo, e.g.
|
||||
pip install git+https://github.com/internetarchive/umbra.git
|
||||
|
||||
Umbra requires an AMQP messaging service like RabbitMQ. On Ubuntu,
|
||||
`sudo apt-get install rabbitmq-server` will install and start RabbitMQ
|
||||
at amqp://guest:guest@localhost:5672/%2f, which the default AMQP url for umbra.
|
||||
`sudo apt-get install rabbitmq-server` will install and start RabbitMQ at amqp://guest:guest@localhost:5672/%2f, which is the default AMQP url for umbra.
|
||||
|
||||
Run
|
||||
---
|
||||
|
@ -15,7 +15,7 @@ arg_parser.add_argument('-u', '--url', dest='amqp_url', default='amqp://guest:gu
|
||||
help='URL identifying the AMQP server to talk to')
|
||||
arg_parser.add_argument('--exchange', dest='amqp_exchange', default='umbra',
|
||||
help='AMQP exchange name')
|
||||
arg_parser.add_argument('--routing-key', dest='amqp_routing_key', default='url',
|
||||
arg_parser.add_argument('--routing-key', dest='amqp_routing_key', default='urls',
|
||||
help='AMQP routing key')
|
||||
arg_parser.add_argument('-i', '--client-id', dest='client_id', default='load_url.0',
|
||||
help='client id - included in the json payload with each url; umbra uses this value as the routing key to send requests back to')
|
||||
|
56
umbra/behaviors.d/marquette_edu.js
Normal file
56
umbra/behaviors.d/marquette_edu.js
Normal file
@ -0,0 +1,56 @@
|
||||
// {"url_regex":"^https?://(?:www\\.)?marquette\\.edu/.*$", "request_idle_timeout_sec":10}
|
||||
//
|
||||
// vim:set sw=8 et:
|
||||
//
|
||||
|
||||
var umbraState = {'idleSince':null};
|
||||
var umbraIntervalID = setInterval(umbraScrollInterval,50);
|
||||
var umbraAlreadyClicked = {};
|
||||
function umbraScrollInterval() {
|
||||
|
||||
//if not at the bottom
|
||||
if(window.scrollY + window.innerHeight < document.documentElement.scrollHeight) {
|
||||
umbraScroll();
|
||||
umbraState.idleSince=null;
|
||||
}
|
||||
else {
|
||||
var videoBoxes = document.querySelectorAll("div#vid_box a");
|
||||
var clickedVideo = false;
|
||||
|
||||
for(i=0;i<videoBoxes.length;i++) {
|
||||
if(!(videoBoxes[i] in umbraAlreadyClicked)){
|
||||
videoBoxes[i].click();
|
||||
umbraState.idleSince=null;
|
||||
umbraAlreadyClicked[videoBoxes[i]]=true;
|
||||
clickedVideo=true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!clickedVideo && umbraState.idleSince==null) {
|
||||
umbraState.idleSince=Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function umbraScroll() {
|
||||
window.scrollBy(0,50);
|
||||
}
|
||||
|
||||
|
||||
// If we haven't had anything to do (scrolled, clicked, etc) in this amount of
|
||||
// time, then we consider ourselves finished with the page.
|
||||
|
||||
var UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC = 10;
|
||||
|
||||
// Called from outside of this script.
|
||||
var umbraBehaviorFinished = function() {
|
||||
if (umbraState.idleSince != null) {
|
||||
var idleTimeMs = Date.now() - umbraState.idleSince;
|
||||
if (idleTimeMs / 1000 > UMBRA_USER_ACTION_IDLE_TIMEOUT_SEC) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -117,6 +117,9 @@ class AmqpBrowserController:
|
||||
break # out of "while True" to acquire another browser
|
||||
except socket.timeout:
|
||||
pass
|
||||
except socket.error:
|
||||
self.logger.error("problem consuming messages from AMQP, will try reconnecting after active browsing finishes", exc_info=True)
|
||||
self._reconnect_requested = True
|
||||
|
||||
if self._consumer_stop.is_set() or time.time() - start >= timeout or self._reconnect_requested:
|
||||
browser.stop()
|
||||
|
Loading…
x
Reference in New Issue
Block a user