mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-09-23 06:04:47 -04:00
commit
a0f8474a73
5 changed files with 28 additions and 15 deletions
|
@ -1,4 +1,5 @@
|
||||||
//.*facebook.com.*
|
//^https?://(?:www\.)?facebook.com/.*$
|
||||||
|
|
||||||
var isOnScreen = function(e) {
|
var isOnScreen = function(e) {
|
||||||
var eTop = e.getBoundingClientRect().top;
|
var eTop = e.getBoundingClientRect().top;
|
||||||
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
|
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
//.*flickr.com.*
|
//^https?://(?:www\.)?flickr.com/.*$
|
||||||
|
|
||||||
setInterval(function() { window.scrollBy(0,50); }, 100);
|
setInterval(function() { window.scrollBy(0,50); }, 100);
|
||||||
|
|
||||||
setTimeout(function() {
|
setTimeout(function() {
|
||||||
|
|
7
umbra/behaviors.d/vimeo.js
Normal file
7
umbra/behaviors.d/vimeo.js
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
//^https?://(?:www\.)?vimeo.com/.*$
|
||||||
|
|
||||||
|
var videoElements = document.getElementsByTagName('video');
|
||||||
|
for (var i = 0; i < videoElements.length; i++) {
|
||||||
|
videoElements[i].play();
|
||||||
|
}
|
||||||
|
|
|
@ -6,21 +6,20 @@ from itertools import chain
|
||||||
import os, re
|
import os, re
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger('behaviors')
|
||||||
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
|
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
|
||||||
behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)])
|
behavior_files = chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js')] for dir, dirs, files in os.walk(behaviors_directory)])
|
||||||
behaviors = []
|
behaviors = []
|
||||||
for file_name in behavior_files:
|
for file_name in behavior_files:
|
||||||
|
logger.debug("reading behavior file {}".format(file_name))
|
||||||
lines = open(file_name).readlines()
|
lines = open(file_name).readlines()
|
||||||
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
|
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
|
||||||
behaviors.append({'site' : pattern, 'script': script})
|
behaviors.append({'url_regex': pattern, 'script': script, 'file': file_name})
|
||||||
|
logger.info("will run behaviors from {} to urls matching {}".format(file_name, pattern))
|
||||||
|
|
||||||
print(behaviors)
|
|
||||||
def execute(url, websock, command_id):
|
def execute(url, websock, command_id):
|
||||||
logger = logging.getLogger('behaviors')
|
|
||||||
print(behaviors)
|
|
||||||
for behavior in behaviors:
|
for behavior in behaviors:
|
||||||
print("Comparing %s and %s" %(behavior['site'], url))
|
if re.match(behavior['url_regex'], url):
|
||||||
if re.match(behavior['site'], url):
|
|
||||||
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id)))
|
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id)))
|
||||||
logger.debug('sending message to {}: {}'.format(websock, msg))
|
logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||||
websock.send(msg)
|
websock.send(msg)
|
||||||
|
|
|
@ -1,13 +1,18 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim: set sw=4 et:
|
# vim: set sw=4 et:
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os, sys, argparse
|
||||||
|
# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
|
||||||
|
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
|
||||||
|
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
||||||
|
|
||||||
from json import dumps, loads
|
from json import dumps, loads
|
||||||
|
import urllib.request, urllib.error, urllib.parse
|
||||||
from itertools import count
|
from itertools import count
|
||||||
import os,sys,argparse, urllib.request, urllib.error, urllib.parse
|
|
||||||
import websocket
|
import websocket
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import logging
|
|
||||||
import threading
|
import threading
|
||||||
import subprocess
|
import subprocess
|
||||||
import signal
|
import signal
|
||||||
|
@ -53,9 +58,10 @@ class UmbraWorker:
|
||||||
if self.idle_timer:
|
if self.idle_timer:
|
||||||
self.idle_timer.cancel()
|
self.idle_timer.cancel()
|
||||||
self.idle_timer = threading.Timer(10, self.page_done.set)
|
self.idle_timer = threading.Timer(10, self.page_done.set)
|
||||||
|
self.idle_timer.start()
|
||||||
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
|
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
|
||||||
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
|
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
|
||||||
self.idle_timer.start()
|
self.hard_stop_timer.start()
|
||||||
|
|
||||||
def visit_page(self, websock):
|
def visit_page(self, websock):
|
||||||
msg = dumps(dict(method="Network.enable", id=next(self.command_id)))
|
msg = dumps(dict(method="Network.enable", id=next(self.command_id)))
|
||||||
|
@ -81,10 +87,10 @@ class UmbraWorker:
|
||||||
routing_key=self.client_id)
|
routing_key=self.client_id)
|
||||||
|
|
||||||
def handle_message(self, websock, message):
|
def handle_message(self, websock, message):
|
||||||
self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95]))
|
# self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95]))
|
||||||
self._reset_idle_timer()
|
|
||||||
message = loads(message)
|
message = loads(message)
|
||||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||||
|
self._reset_idle_timer()
|
||||||
self.send_request_to_amqp(message)
|
self.send_request_to_amqp(message)
|
||||||
elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
|
elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
|
||||||
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
|
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
|
||||||
|
@ -199,7 +205,6 @@ class Chrome:
|
||||||
self.chrome_process.wait()
|
self.chrome_process.wait()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
|
arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
|
||||||
description='umbra - Browser automation tool',
|
description='umbra - Browser automation tool',
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue