mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Merge branch 'master' into fix-travis
* master: use \n to delimit outlinks because urls can contain spaces (and anything else except [\n\t\0]) in the fragment part even after browser canonicalization
This commit is contained in:
commit
3ac2dabb74
@ -302,7 +302,7 @@ class Browser:
|
|||||||
|
|
||||||
self._behavior = None
|
self._behavior = None
|
||||||
|
|
||||||
OUTLINKS_JS = """
|
OUTLINKS_JS = r"""
|
||||||
var __brzl_framesDone = new Set();
|
var __brzl_framesDone = new Set();
|
||||||
var __brzl_compileOutlinks = function(frame) {
|
var __brzl_compileOutlinks = function(frame) {
|
||||||
__brzl_framesDone.add(frame);
|
__brzl_framesDone.add(frame);
|
||||||
@ -317,7 +317,7 @@ var __brzl_compileOutlinks = function(frame) {
|
|||||||
}
|
}
|
||||||
return outlinks;
|
return outlinks;
|
||||||
}
|
}
|
||||||
__brzl_compileOutlinks(window).join(' ');
|
__brzl_compileOutlinks(window).join('\n');
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _chain_chrome_messages(self, chain):
|
def _chain_chrome_messages(self, chain):
|
||||||
@ -386,7 +386,7 @@ __brzl_compileOutlinks(window).join(' ');
|
|||||||
|
|
||||||
def set_outlinks(message):
|
def set_outlinks(message):
|
||||||
self._outlinks = frozenset(
|
self._outlinks = frozenset(
|
||||||
message["result"]["result"]["value"].split())
|
message["result"]["result"]["value"].split("\n"))
|
||||||
|
|
||||||
chain.append({
|
chain.append({
|
||||||
"info": "retrieving outlinks",
|
"info": "retrieving outlinks",
|
||||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b7.dev121',
|
version='1.1b7.dev122',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user