From f7427219cf19862f7a14ea319812376a0199ec89 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 21 Dec 2016 14:21:20 -0800 Subject: [PATCH] restore handling of "aw snap" or "he's dead jim" --- brozzler/__init__.py | 3 ++- brozzler/browser.py | 4 ++++ setup.py | 2 +- tests/test_brozzling.py | 27 +++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 tests/test_brozzling.py diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 6853c40..9e5d5fc 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -164,6 +164,7 @@ from brozzler.site import Page, Site from brozzler.worker import BrozzlerWorker from brozzler.robots import is_permitted_by_robots from brozzler.frontier import RethinkDbFrontier -from brozzler.browser import Browser, BrowserPool +from brozzler.browser import Browser, BrowserPool, BrowsingException from brozzler.job import new_job, new_site, Job +from brozzler.cli import suggest_default_chrome_exe diff --git a/brozzler/browser.py b/brozzler/browser.py index 5595e7a..a947044 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -185,6 +185,10 @@ class WebsockReceiverThread(threading.Thread): self.got_page_load_event = datetime.datetime.utcnow() elif message['method'] == 'Debugger.paused': self._debugger_paused(message) + elif message["method"] == "Inspector.targetCrashed": + self.logger.error( + '''chrome tab went "aw snap" or "he's dead jim"!''') + brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug( '%s console.%s %s', self.websock.url, diff --git a/setup.py b/setup.py index 3422c97..96cbd90 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b9.dev152', + version='1.1b9.dev153', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py new file mode 100644 index 0000000..b41278d --- /dev/null +++ b/tests/test_brozzling.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +''' +test_brozzling.py - XXX explain + +Copyright (C) 2016 Internet Archive + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import pytest +import brozzler + +def test_aw_snap_hes_dead_jim(): + chrome_exe = brozzler.suggest_default_chrome_exe() + with brozzler.Browser(chrome_exe=chrome_exe) as browser: + with pytest.raises(brozzler.BrowsingException): + browser.browse_page('chrome://crash')