diff --git a/brozzler/browser.py b/brozzler/browser.py index 6e85dfa..41d9ae7 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -1,7 +1,7 @@ ''' brozzler/browser.py - manages the browsers for brozzler -Copyright (C) 2014-2016 Internet Archive +Copyright (C) 2014-2017 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -485,9 +485,17 @@ class Browser: lambda: self.websock_thread.received_result(msg_id), timeout=timeout) message = self.websock_thread.pop_result(msg_id) - if message['result']['result']['value']: - return frozenset(message['result']['result']['value'].split('\n')) + if ('result' in message and 'result' in message['result'] + and 'value' in message['result']['result']): + if message['result']['result']['value']: + return frozenset( + message['result']['result']['value'].split('\n')) + else: + # no links found + return frozenset() else: + self.logger.error( + 'problem extracting outlinks, result message: %s', message) return frozenset() def screenshot(self, timeout=30): diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index 3be0dfc..65c4098 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,3 +1,5 @@ +// we have problems if the page has changed the definition of Set or Array +// http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); diff --git a/setup.py b/setup.py index 7cc050e..7869965 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b9.dev191', + version='1.1b9.dev192', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',