mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 23:35:54 -04:00
commit
a0f8474a73
@ -1,4 +1,5 @@
|
||||
//.*facebook.com.*
|
||||
//^https?://(?:www\.)?facebook.com/.*$
|
||||
|
||||
var isOnScreen = function(e) {
|
||||
var eTop = e.getBoundingClientRect().top;
|
||||
return eTop > window.scrollY && eTop < window.scrollY + window.innerHeight;
|
||||
|
@ -1,4 +1,5 @@
|
||||
//.*flickr.com.*
|
||||
//^https?://(?:www\.)?flickr.com/.*$
|
||||
|
||||
setInterval(function() { window.scrollBy(0,50); }, 100);
|
||||
|
||||
setTimeout(function() {
|
||||
|
7
umbra/behaviors.d/vimeo.js
Normal file
7
umbra/behaviors.d/vimeo.js
Normal file
@ -0,0 +1,7 @@
|
||||
//^https?://(?:www\.)?vimeo.com/.*$
|
||||
|
||||
var videoElements = document.getElementsByTagName('video');
|
||||
for (var i = 0; i < videoElements.length; i++) {
|
||||
videoElements[i].play();
|
||||
}
|
||||
|
@ -6,21 +6,20 @@ from itertools import chain
|
||||
import os, re
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger('behaviors')
|
||||
behaviors_directory = os.path.sep.join(__file__.split(os.path.sep)[:-1] + ['behaviors.d'])
|
||||
behavior_files = chain(*[[dir + os.path.sep + file for file in files] for dir, dirs, files in os.walk(behaviors_directory)])
|
||||
behavior_files = chain(*[[os.path.join(dir, file) for file in files if file.endswith('.js')] for dir, dirs, files in os.walk(behaviors_directory)])
|
||||
behaviors = []
|
||||
for file_name in behavior_files:
|
||||
logger.debug("reading behavior file {}".format(file_name))
|
||||
lines = open(file_name).readlines()
|
||||
pattern, script = lines[0][2:].strip(), ''.join(lines[1:])
|
||||
behaviors.append({'site' : pattern, 'script': script})
|
||||
behaviors.append({'url_regex': pattern, 'script': script, 'file': file_name})
|
||||
logger.info("will run behaviors from {} to urls matching {}".format(file_name, pattern))
|
||||
|
||||
print(behaviors)
|
||||
def execute(url, websock, command_id):
|
||||
logger = logging.getLogger('behaviors')
|
||||
print(behaviors)
|
||||
for behavior in behaviors:
|
||||
print("Comparing %s and %s" %(behavior['site'], url))
|
||||
if re.match(behavior['site'], url):
|
||||
if re.match(behavior['url_regex'], url):
|
||||
msg = dumps(dict(method="Runtime.evaluate", params={"expression": behavior['script']}, id=next(command_id)))
|
||||
logger.debug('sending message to {}: {}'.format(websock, msg))
|
||||
websock.send(msg)
|
||||
|
@ -1,13 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
# vim: set sw=4 et:
|
||||
|
||||
import logging
|
||||
import os, sys, argparse
|
||||
# logging.basicConfig(stream=sys.stdout, level=logging.INFO,
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
|
||||
format='%(asctime)s %(process)d %(levelname)s %(threadName)s %(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
||||
|
||||
from json import dumps, loads
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
from itertools import count
|
||||
import os,sys,argparse, urllib.request, urllib.error, urllib.parse
|
||||
import websocket
|
||||
import time
|
||||
import uuid
|
||||
import logging
|
||||
import threading
|
||||
import subprocess
|
||||
import signal
|
||||
@ -53,9 +58,10 @@ class UmbraWorker:
|
||||
if self.idle_timer:
|
||||
self.idle_timer.cancel()
|
||||
self.idle_timer = threading.Timer(10, self.page_done.set)
|
||||
self.idle_timer.start()
|
||||
if not self.hard_stop_timer: #10 minutes is as long as we should give 1 page
|
||||
self.hard_stop_timer = threading.Timer(600, self.page_done.set)
|
||||
self.idle_timer.start()
|
||||
self.hard_stop_timer.start()
|
||||
|
||||
def visit_page(self, websock):
|
||||
msg = dumps(dict(method="Network.enable", id=next(self.command_id)))
|
||||
@ -81,10 +87,10 @@ class UmbraWorker:
|
||||
routing_key=self.client_id)
|
||||
|
||||
def handle_message(self, websock, message):
|
||||
self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95]))
|
||||
self._reset_idle_timer()
|
||||
# self.logger.debug("handling message from websocket {} - {}".format(websock, message[:95]))
|
||||
message = loads(message)
|
||||
if "method" in message.keys() and message["method"] == "Network.requestWillBeSent":
|
||||
self._reset_idle_timer()
|
||||
self.send_request_to_amqp(message)
|
||||
elif "method" in message.keys() and message["method"] == "Page.loadEventFired":
|
||||
self.logger.debug("got Page.loadEventFired, starting behaviors for {}".format(self.url))
|
||||
@ -199,7 +205,6 @@ class Chrome:
|
||||
self.chrome_process.wait()
|
||||
|
||||
def main():
|
||||
|
||||
arg_parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
|
||||
description='umbra - Browser automation tool',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
Loading…
x
Reference in New Issue
Block a user