mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
Merge remote-tracking branch 'eldondev/master' into nlevitt-master (add behaviors)
Conflicts: umbra/umbra.py
This commit is contained in:
commit
2368688fbe
27
umbra/behaviors.json
Normal file
27
umbra/behaviors.json
Normal file
@ -0,0 +1,27 @@
|
||||
[
|
||||
{
|
||||
"scripts": [
|
||||
"setInterval(function() { window.scrollBy(10000,10000); }, 1000);"
|
||||
],
|
||||
"site": ".*"
|
||||
},
|
||||
{
|
||||
"scripts": [
|
||||
"setTimeout(function() { setInterval(function() { a = document.evaluate( '//a[(@href = \"#\" and @role = \"button\" and contains(.,\"more comments\")) or starts-with(@href, \"/browse/likes\")]', document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 1000);}, 5000);"
|
||||
],
|
||||
"site": ".*facebook.com.*"
|
||||
},
|
||||
{
|
||||
"scripts": [
|
||||
"setTimeout(function() { a = document.evaluate( \"//a[contains(@class, 'sn-ico-slideshow')]\", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 5000);"
|
||||
],
|
||||
"site": ".*flickr.com.*"
|
||||
},
|
||||
{
|
||||
"scripts": [
|
||||
"setTimeout(function() { a = document.evaluate( \"//a[contains(@data-track, 'photo-click')]\", document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); setInterval(function() { f = a.iterateNext(); f.click();}, 5000)}, 5000);"
|
||||
],
|
||||
"site": ".*flickr.com.*"
|
||||
}
|
||||
|
||||
]
|
13
umbra/behaviors.py
Normal file
13
umbra/behaviors.py
Normal file
@ -0,0 +1,13 @@
|
||||
from json import dumps, load
|
||||
from time import sleep
|
||||
import os, re
|
||||
|
||||
behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json'])
|
||||
def execute(url, ws, command_id):
|
||||
sleep(5)
|
||||
with open(behaviors_file) as js:
|
||||
behaviors = load(js)
|
||||
for behavior in behaviors:
|
||||
if re.match(behavior['site'], url):
|
||||
for script in behavior['scripts']:
|
||||
ws.send(dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id))))
|
@ -24,6 +24,8 @@ class UmbraWorker:
|
||||
self.chrome_exe = chrome_exe
|
||||
self.chrome_wait = chrome_wait
|
||||
self.client_id = client_id
|
||||
self.page_done = threading.Event()
|
||||
self.idle_timer = None
|
||||
|
||||
def browse_page(self, url, url_metadata):
|
||||
with self.lock:
|
||||
@ -35,10 +37,19 @@ class UmbraWorker:
|
||||
websock_thread = threading.Thread(target=websock.run_forever)
|
||||
websock_thread.start()
|
||||
|
||||
# XXX more logic goes here
|
||||
time.sleep(10)
|
||||
self.page_done.clear()
|
||||
self._reset_idle_timer()
|
||||
while not self.page_done.is_set():
|
||||
time.sleep(0.5)
|
||||
|
||||
websock.close()
|
||||
self.idle_timer = None
|
||||
|
||||
def _reset_idle_timer(self):
|
||||
if self.idle_timer:
|
||||
self.idle_timer.cancel()
|
||||
self.idle_timer = threading.Timer(10, self.page_done.set)
|
||||
self.idle_timer.start()
|
||||
|
||||
def visit_page(self, websock):
|
||||
msg = dumps(dict(method="Network.enable", id=next(self.command_id)))
|
||||
@ -49,10 +60,14 @@ class UmbraWorker:
|
||||
self.logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||
websock.send(msg)
|
||||
|
||||
from umbra import behaviors
|
||||
behaviors.execute(self.url, websock, self.command_id)
|
||||
|
||||
def handle_message(self, websock, message):
|
||||
# self.logger.debug("handling message from websocket {} - {}".format(websock, message))
|
||||
message = loads(message)
|
||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||
self._reset_idle_timer()
|
||||
payload = message['params']['request']
|
||||
payload['parentUrl'] = self.url
|
||||
payload['parentUrlMetadata'] = self.url_metadata
|
||||
@ -71,6 +86,22 @@ class UmbraWorker:
|
||||
routing_key=self.client_id,
|
||||
exchange=self.umbra.umbra_exchange)
|
||||
|
||||
def get_message_handler(self, url, url_metadata, command_id):
|
||||
this_watchdog = self.watchdog(command_id)
|
||||
def handle_message(ws, message):
|
||||
this_watchdog.send(ws)
|
||||
message = loads(message)
|
||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||
to_send = {}
|
||||
to_send.update(message['params']['request'])
|
||||
to_send.update(dict(parentUrl=url,parentUrlMetadata=url_metadata))
|
||||
self.logger.debug('sending to amqp: {}'.format(to_send))
|
||||
with self.producer_lock:
|
||||
self.producer.publish(to_send,
|
||||
routing_key='request',
|
||||
exchange=self.umbra_exchange)
|
||||
return handle_message
|
||||
|
||||
class Umbra:
|
||||
logger = logging.getLogger('umbra.Umbra')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user