mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
First few behaviors
This commit is contained in:
parent
fe1c68af90
commit
445288d5e7
20
umbra/behaviors.json
Normal file
20
umbra/behaviors.json
Normal file
@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"scripts": [
|
||||
"setInterval(function() { window.scrollBy(10000,10000); }, 1000);"
|
||||
],
|
||||
"site": ".*"
|
||||
},
|
||||
{
|
||||
"scripts": [
|
||||
"setInterval(function() { a = document.evaluate( '//a[(@href = \"#\" and @role = \"button\" and contains(.,\"more comments\")) or starts-with(@href, \"/browse/likes\")]', document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 1000);"
|
||||
],
|
||||
"site": ".*facebook.com.*"
|
||||
},
|
||||
{
|
||||
"scripts": [
|
||||
""
|
||||
],
|
||||
"site": ".*flickr.com.*"
|
||||
}
|
||||
]
|
13
umbra/behaviors.py
Normal file
13
umbra/behaviors.py
Normal file
@ -0,0 +1,13 @@
|
||||
from json import dumps, load
|
||||
from time import sleep
|
||||
import os, re
|
||||
|
||||
behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json'])
|
||||
def execute(url, ws, command_id):
|
||||
sleep(5)
|
||||
with open(behaviors_file) as js:
|
||||
behaviors = load(js)
|
||||
for behavior in behaviors:
|
||||
if re.match(behavior['site'], url):
|
||||
for script in behavior['scripts']:
|
||||
ws.send(dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id))))
|
@ -20,10 +20,30 @@ class Umbra:
|
||||
self.producer_lock = threading.Lock()
|
||||
self.consume_amqp()
|
||||
|
||||
def get_message_handler(self, url, url_metadata):
|
||||
def watchdog(self, command_id):
|
||||
def wrapped():
|
||||
timer = None
|
||||
while True:
|
||||
ws = yield
|
||||
if timer:
|
||||
self.logger.info("Cancelling")
|
||||
timer.cancel()
|
||||
def go():
|
||||
close_exp = "window.open('', '_self', ''); window.close(); "
|
||||
ws.send(dumps(dict(method="Runtime.evaluate", params={"expression": close_exp}, id=next(command_id))))
|
||||
self.logger.info("Going")
|
||||
ws.close()
|
||||
timer = threading.Timer(10, go)
|
||||
timer.start()
|
||||
result = wrapped()
|
||||
next(result)
|
||||
return result
|
||||
|
||||
def get_message_handler(self, url, url_metadata, command_id):
|
||||
this_watchdog = self.watchdog(command_id)
|
||||
def handle_message(ws, message):
|
||||
this_watchdog.send(ws)
|
||||
message = loads(message)
|
||||
self.logger.info(message)
|
||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||
to_send = {}
|
||||
to_send.update(message['params']['request'])
|
||||
@ -47,19 +67,19 @@ class Umbra:
|
||||
|
||||
def fetch_url(self, body, message):
|
||||
url, metadata = body['url'], body['metadata']
|
||||
command_id = count(1)
|
||||
def send_websocket_commands(ws):
|
||||
command_id = count(1)
|
||||
ws.send(dumps(dict(method="Network.enable", id=next(command_id))))
|
||||
ws.send(dumps(dict(method="Page.navigate", id=next(command_id), params={"url": url})))
|
||||
|
||||
#from umbra import behaviors
|
||||
#behaviors.execute(url, ws, command_id)
|
||||
from umbra import behaviors
|
||||
behaviors.execute(url, ws, command_id)
|
||||
|
||||
message.ack()
|
||||
|
||||
with Chrome(*self.chrome_args) as websocket_url:
|
||||
websock = websocket.WebSocketApp(websocket_url)
|
||||
websock.on_message = self.get_message_handler(url, metadata)
|
||||
websock.on_message = self.get_message_handler(url, metadata, command_id)
|
||||
websock.on_open = send_websocket_commands
|
||||
websock.run_forever()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user