mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
First few behaviors
This commit is contained in:
parent
fe1c68af90
commit
445288d5e7
20
umbra/behaviors.json
Normal file
20
umbra/behaviors.json
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"scripts": [
|
||||||
|
"setInterval(function() { window.scrollBy(10000,10000); }, 1000);"
|
||||||
|
],
|
||||||
|
"site": ".*"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scripts": [
|
||||||
|
"setInterval(function() { a = document.evaluate( '//a[(@href = \"#\" and @role = \"button\" and contains(.,\"more comments\")) or starts-with(@href, \"/browse/likes\")]', document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null ); f = a.iterateNext(); f.click();}, 1000);"
|
||||||
|
],
|
||||||
|
"site": ".*facebook.com.*"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scripts": [
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"site": ".*flickr.com.*"
|
||||||
|
}
|
||||||
|
]
|
13
umbra/behaviors.py
Normal file
13
umbra/behaviors.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from json import dumps, load
|
||||||
|
from time import sleep
|
||||||
|
import os, re
|
||||||
|
|
||||||
|
behaviors_file = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.json'])
|
||||||
|
def execute(url, ws, command_id):
|
||||||
|
sleep(5)
|
||||||
|
with open(behaviors_file) as js:
|
||||||
|
behaviors = load(js)
|
||||||
|
for behavior in behaviors:
|
||||||
|
if re.match(behavior['site'], url):
|
||||||
|
for script in behavior['scripts']:
|
||||||
|
ws.send(dumps(dict(method="Runtime.evaluate", params={"expression": script}, id=next(command_id))))
|
@ -20,10 +20,30 @@ class Umbra:
|
|||||||
self.producer_lock = threading.Lock()
|
self.producer_lock = threading.Lock()
|
||||||
self.consume_amqp()
|
self.consume_amqp()
|
||||||
|
|
||||||
def get_message_handler(self, url, url_metadata):
|
def watchdog(self, command_id):
|
||||||
|
def wrapped():
|
||||||
|
timer = None
|
||||||
|
while True:
|
||||||
|
ws = yield
|
||||||
|
if timer:
|
||||||
|
self.logger.info("Cancelling")
|
||||||
|
timer.cancel()
|
||||||
|
def go():
|
||||||
|
close_exp = "window.open('', '_self', ''); window.close(); "
|
||||||
|
ws.send(dumps(dict(method="Runtime.evaluate", params={"expression": close_exp}, id=next(command_id))))
|
||||||
|
self.logger.info("Going")
|
||||||
|
ws.close()
|
||||||
|
timer = threading.Timer(10, go)
|
||||||
|
timer.start()
|
||||||
|
result = wrapped()
|
||||||
|
next(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_message_handler(self, url, url_metadata, command_id):
|
||||||
|
this_watchdog = self.watchdog(command_id)
|
||||||
def handle_message(ws, message):
|
def handle_message(ws, message):
|
||||||
|
this_watchdog.send(ws)
|
||||||
message = loads(message)
|
message = loads(message)
|
||||||
self.logger.info(message)
|
|
||||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||||
to_send = {}
|
to_send = {}
|
||||||
to_send.update(message['params']['request'])
|
to_send.update(message['params']['request'])
|
||||||
@ -47,19 +67,19 @@ class Umbra:
|
|||||||
|
|
||||||
def fetch_url(self, body, message):
|
def fetch_url(self, body, message):
|
||||||
url, metadata = body['url'], body['metadata']
|
url, metadata = body['url'], body['metadata']
|
||||||
|
command_id = count(1)
|
||||||
def send_websocket_commands(ws):
|
def send_websocket_commands(ws):
|
||||||
command_id = count(1)
|
|
||||||
ws.send(dumps(dict(method="Network.enable", id=next(command_id))))
|
ws.send(dumps(dict(method="Network.enable", id=next(command_id))))
|
||||||
ws.send(dumps(dict(method="Page.navigate", id=next(command_id), params={"url": url})))
|
ws.send(dumps(dict(method="Page.navigate", id=next(command_id), params={"url": url})))
|
||||||
|
|
||||||
#from umbra import behaviors
|
from umbra import behaviors
|
||||||
#behaviors.execute(url, ws, command_id)
|
behaviors.execute(url, ws, command_id)
|
||||||
|
|
||||||
message.ack()
|
message.ack()
|
||||||
|
|
||||||
with Chrome(*self.chrome_args) as websocket_url:
|
with Chrome(*self.chrome_args) as websocket_url:
|
||||||
websock = websocket.WebSocketApp(websocket_url)
|
websock = websocket.WebSocketApp(websocket_url)
|
||||||
websock.on_message = self.get_message_handler(url, metadata)
|
websock.on_message = self.get_message_handler(url, metadata, command_id)
|
||||||
websock.on_open = send_websocket_commands
|
websock.on_open = send_websocket_commands
|
||||||
websock.run_forever()
|
websock.run_forever()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user