mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-20 12:54:23 -04:00
handle errors from extract-outlinks.js, which happens on polyvore.com because it changes the definition of Set 😭
This commit is contained in:
parent
0d0da22613
commit
3c4ab834da
3 changed files with 14 additions and 4 deletions
|
@ -1,7 +1,7 @@
|
||||||
'''
|
'''
|
||||||
brozzler/browser.py - manages the browsers for brozzler
|
brozzler/browser.py - manages the browsers for brozzler
|
||||||
|
|
||||||
Copyright (C) 2014-2016 Internet Archive
|
Copyright (C) 2014-2017 Internet Archive
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
@ -485,9 +485,17 @@ class Browser:
|
||||||
lambda: self.websock_thread.received_result(msg_id),
|
lambda: self.websock_thread.received_result(msg_id),
|
||||||
timeout=timeout)
|
timeout=timeout)
|
||||||
message = self.websock_thread.pop_result(msg_id)
|
message = self.websock_thread.pop_result(msg_id)
|
||||||
|
if ('result' in message and 'result' in message['result']
|
||||||
|
and 'value' in message['result']['result']):
|
||||||
if message['result']['result']['value']:
|
if message['result']['result']['value']:
|
||||||
return frozenset(message['result']['result']['value'].split('\n'))
|
return frozenset(
|
||||||
|
message['result']['result']['value'].split('\n'))
|
||||||
else:
|
else:
|
||||||
|
# no links found
|
||||||
|
return frozenset()
|
||||||
|
else:
|
||||||
|
self.logger.error(
|
||||||
|
'problem extracting outlinks, result message: %s', message)
|
||||||
return frozenset()
|
return frozenset()
|
||||||
|
|
||||||
def screenshot(self, timeout=30):
|
def screenshot(self, timeout=30):
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// we have problems if the page has changed the definition of Set or Array
|
||||||
|
// http://www.polyvore.com/ does this for example
|
||||||
var __brzl_framesDone = new Set();
|
var __brzl_framesDone = new Set();
|
||||||
var __brzl_compileOutlinks = function(frame) {
|
var __brzl_compileOutlinks = function(frame) {
|
||||||
__brzl_framesDone.add(frame);
|
__brzl_framesDone.add(frame);
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev191',
|
version='1.1b9.dev192',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue