Merge pull request #88 from nlevitt/block-urls

Block google analytics URLs using new Network.setBlockedURLs API
This commit is contained in:
Noah Levitt 2018-02-06 16:42:24 -08:00 committed by GitHub
commit 057284c2a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 25 deletions

View File

@ -22,7 +22,7 @@ Requirements
- Python 3.4 or later
- RethinkDB deployment
- Chromium or Google Chrome browser
- Chromium or Google Chrome >= version 64
Worth noting is that the browser requires a graphical environment to run. You
already have this on your laptop, but on a server it will probably require

View File

@ -200,21 +200,6 @@ class WebsockReceiverThread(threading.Thread):
'uncaught exception in _handle_message message=%s',
message, exc_info=True)
def _debugger_paused(self, message):
# we hit the breakpoint set in start(), get rid of google analytics
self.logger.debug('debugger paused! message=%s', message)
scriptId = message['params']['callFrames'][0]['location']['scriptId']
# replace script
self.websock.send(
json.dumps(dict(
id=0, method='Debugger.setScriptSource',
params={'scriptId': scriptId,
'scriptSource': 'console.log("google analytics is no more!");'})))
# resume execution
self.websock.send(json.dumps(dict(id=0, method='Debugger.resume')))
def _network_response_received(self, message):
if (message['params']['response']['status'] == 420
and 'Warcprox-Meta' in CaseInsensitiveDict(
@ -255,8 +240,6 @@ class WebsockReceiverThread(threading.Thread):
elif message['method'] == 'Network.requestWillBeSent':
if self.on_request:
self.on_request(message)
elif message['method'] == 'Debugger.paused':
self._debugger_paused(message)
elif message['method'] == 'Page.interstitialShown':
# for AITFIVE-1529: handle http auth
# for now, we should consider killing the browser when we receive Page.interstitialShown and
@ -358,16 +341,14 @@ class Browser:
self.send_to_chrome(method='Network.enable')
self.send_to_chrome(method='Page.enable')
self.send_to_chrome(method='Console.enable')
self.send_to_chrome(method='Debugger.enable')
self.send_to_chrome(method='Runtime.enable')
# disable google analytics, see _handle_message() where breakpoint
# is caught Debugger.paused
# disable google analytics
self.send_to_chrome(
method='Debugger.setBreakpointByUrl',
params={
'lineNumber': 1,
'urlRegex': 'https?://www.google-analytics.com/analytics.js'})
method='Network.setBlockedURLs',
params={'urls': ['*google-analytics.com/analytics.js',
'*google-analytics.com/ga.js']}
)
def stop(self):
'''

View File

@ -29,6 +29,35 @@ import signal
import sqlite3
import json
import tempfile
import sys
def check_version(chrome_exe):
'''
Raises SystemExit if `chrome_exe` is not a supported browser version.
Must run in the main thread to have the desired effect.
'''
# mac$ /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --version
# Google Chrome 64.0.3282.140
# mac$ /Applications/Google\ Chrome\ Canary.app/Contents/MacOS/Google\ Chrome\ Canary --version
# Google Chrome 66.0.3341.0 canary
# linux$ chromium-browser --version
# Using PPAPI flash.
# --ppapi-flash-path=/usr/lib/adobe-flashplugin/libpepflashplayer.so --ppapi-flash-version=
# Chromium 61.0.3163.100 Built on Ubuntu , running on Ubuntu 16.04
cmd = [chrome_exe, '--version']
out = subprocess.check_output(cmd, timeout=60)
m = re.search(br'(Chromium|Google Chrome) ([\d.]+)', out)
if not m:
sys.exit(
'unable to parse browser version from output of '
'%r: %r' % (subprocess.list2cmdline(cmd), out))
version_str = m.group(2).decode()
major_version = int(version_str.split('.')[0])
if major_version < 64:
sys.exit('brozzler requires chrome/chromium version 64 or '
'later but %s reports version %s' % (
chrome_exe, version_str))
class Chrome:
logger = logging.getLogger(__module__ + '.' + __qualname__)

View File

@ -167,6 +167,7 @@ def brozzle_page(argv=None):
args = arg_parser.parse_args(args=argv[1:])
configure_logging(args)
brozzler.chrome.check_version(args.chrome_exe)
behavior_parameters = {}
if args.behavior_parameters:
@ -325,6 +326,7 @@ def brozzler_worker(argv=None):
args = arg_parser.parse_args(args=argv[1:])
configure_logging(args)
brozzler.chrome.check_version(args.chrome_exe)
def dump_state(signum, frame):
signal.signal(signal.SIGQUIT, signal.SIG_IGN)

View File

@ -268,6 +268,7 @@ def main(argv=None):
arg_parser = _build_arg_parser(argv)
args = arg_parser.parse_args(args=argv[1:])
brozzler.cli.configure_logging(args)
brozzler.chrome.check_version(args.chrome_exe)
controller = BrozzlerEasyController(args)
signal.signal(signal.SIGTERM, lambda a,b: controller.stop.set())