diff --git a/README.rst b/README.rst index 6dee908..2e6a3a9 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ Requirements - Python 3.4 or later - RethinkDB deployment -- Chromium or Google Chrome browser +- Chromium or Google Chrome >= version 64 Worth noting is that the browser requires a graphical environment to run. You already have this on your laptop, but on a server it will probably require diff --git a/brozzler/browser.py b/brozzler/browser.py index 18959b3..e5f8afd 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -200,21 +200,6 @@ class WebsockReceiverThread(threading.Thread): 'uncaught exception in _handle_message message=%s', message, exc_info=True) - def _debugger_paused(self, message): - # we hit the breakpoint set in start(), get rid of google analytics - self.logger.debug('debugger paused! message=%s', message) - scriptId = message['params']['callFrames'][0]['location']['scriptId'] - - # replace script - self.websock.send( - json.dumps(dict( - id=0, method='Debugger.setScriptSource', - params={'scriptId': scriptId, - 'scriptSource': 'console.log("google analytics is no more!");'}))) - - # resume execution - self.websock.send(json.dumps(dict(id=0, method='Debugger.resume'))) - def _network_response_received(self, message): if (message['params']['response']['status'] == 420 and 'Warcprox-Meta' in CaseInsensitiveDict( @@ -255,8 +240,6 @@ class WebsockReceiverThread(threading.Thread): elif message['method'] == 'Network.requestWillBeSent': if self.on_request: self.on_request(message) - elif message['method'] == 'Debugger.paused': - self._debugger_paused(message) elif message['method'] == 'Page.interstitialShown': # for AITFIVE-1529: handle http auth # for now, we should consider killing the browser when we receive Page.interstitialShown and @@ -358,16 +341,14 @@ class Browser: self.send_to_chrome(method='Network.enable') self.send_to_chrome(method='Page.enable') self.send_to_chrome(method='Console.enable') - self.send_to_chrome(method='Debugger.enable') self.send_to_chrome(method='Runtime.enable') - # disable google analytics, see _handle_message() where breakpoint - # is caught Debugger.paused + # disable google analytics self.send_to_chrome( - method='Debugger.setBreakpointByUrl', - params={ - 'lineNumber': 1, - 'urlRegex': 'https?://www.google-analytics.com/analytics.js'}) + method='Network.setBlockedURLs', + params={'urls': ['*google-analytics.com/analytics.js', + '*google-analytics.com/ga.js']} + ) def stop(self): ''' diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 3b44773..1d12b04 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -29,6 +29,35 @@ import signal import sqlite3 import json import tempfile +import sys + +def check_version(chrome_exe): + ''' + Raises SystemExit if `chrome_exe` is not a supported browser version. + + Must run in the main thread to have the desired effect. + ''' + # mac$ /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --version + # Google Chrome 64.0.3282.140 + # mac$ /Applications/Google\ Chrome\ Canary.app/Contents/MacOS/Google\ Chrome\ Canary --version + # Google Chrome 66.0.3341.0 canary + # linux$ chromium-browser --version + # Using PPAPI flash. + # --ppapi-flash-path=/usr/lib/adobe-flashplugin/libpepflashplayer.so --ppapi-flash-version= + # Chromium 61.0.3163.100 Built on Ubuntu , running on Ubuntu 16.04 + cmd = [chrome_exe, '--version'] + out = subprocess.check_output(cmd, timeout=60) + m = re.search(br'(Chromium|Google Chrome) ([\d.]+)', out) + if not m: + sys.exit( + 'unable to parse browser version from output of ' + '%r: %r' % (subprocess.list2cmdline(cmd), out)) + version_str = m.group(2).decode() + major_version = int(version_str.split('.')[0]) + if major_version < 64: + sys.exit('brozzler requires chrome/chromium version 64 or ' + 'later but %s reports version %s' % ( + chrome_exe, version_str)) class Chrome: logger = logging.getLogger(__module__ + '.' + __qualname__) diff --git a/brozzler/cli.py b/brozzler/cli.py index c4b4ced..39314fc 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -167,6 +167,7 @@ def brozzle_page(argv=None): args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) + brozzler.chrome.check_version(args.chrome_exe) behavior_parameters = {} if args.behavior_parameters: @@ -325,6 +326,7 @@ def brozzler_worker(argv=None): args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) + brozzler.chrome.check_version(args.chrome_exe) def dump_state(signum, frame): signal.signal(signal.SIGQUIT, signal.SIG_IGN) diff --git a/brozzler/easy.py b/brozzler/easy.py index c9480a1..83cf1ba 100644 --- a/brozzler/easy.py +++ b/brozzler/easy.py @@ -268,6 +268,7 @@ def main(argv=None): arg_parser = _build_arg_parser(argv) args = arg_parser.parse_args(args=argv[1:]) brozzler.cli.configure_logging(args) + brozzler.chrome.check_version(args.chrome_exe) controller = BrozzlerEasyController(args) signal.signal(signal.SIGTERM, lambda a,b: controller.stop.set())