mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-17 12:08:53 -04:00
Merge branch 'master' into fix-travis
* master: use \n to delimit outlinks because urls can contain spaces (and anything else except [\n\t\0]) in the fragment part even after browser canonicalization
This commit is contained in:
commit
3ac2dabb74
2 changed files with 4 additions and 4 deletions
|
@ -302,7 +302,7 @@ class Browser:
|
|||
|
||||
self._behavior = None
|
||||
|
||||
OUTLINKS_JS = """
|
||||
OUTLINKS_JS = r"""
|
||||
var __brzl_framesDone = new Set();
|
||||
var __brzl_compileOutlinks = function(frame) {
|
||||
__brzl_framesDone.add(frame);
|
||||
|
@ -317,7 +317,7 @@ var __brzl_compileOutlinks = function(frame) {
|
|||
}
|
||||
return outlinks;
|
||||
}
|
||||
__brzl_compileOutlinks(window).join(' ');
|
||||
__brzl_compileOutlinks(window).join('\n');
|
||||
"""
|
||||
|
||||
def _chain_chrome_messages(self, chain):
|
||||
|
@ -386,7 +386,7 @@ __brzl_compileOutlinks(window).join(' ');
|
|||
|
||||
def set_outlinks(message):
|
||||
self._outlinks = frozenset(
|
||||
message["result"]["result"]["value"].split())
|
||||
message["result"]["result"]["value"].split("\n"))
|
||||
|
||||
chain.append({
|
||||
"info": "retrieving outlinks",
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b7.dev121',
|
||||
version='1.1b7.dev122',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue