Merge branch 'master' into fix-travis

* master:
  use \n to delimit outlinks because urls can contain spaces (and anything else except [\n\t\0]) in the fragment part even after browser canonicalization
This commit is contained in:
Noah Levitt 2016-11-11 14:15:52 -08:00
commit 3ac2dabb74
2 changed files with 4 additions and 4 deletions

View File

@ -302,7 +302,7 @@ class Browser:
self._behavior = None
OUTLINKS_JS = """
OUTLINKS_JS = r"""
var __brzl_framesDone = new Set();
var __brzl_compileOutlinks = function(frame) {
__brzl_framesDone.add(frame);
@ -317,7 +317,7 @@ var __brzl_compileOutlinks = function(frame) {
}
return outlinks;
}
__brzl_compileOutlinks(window).join(' ');
__brzl_compileOutlinks(window).join('\n');
"""
def _chain_chrome_messages(self, chain):
@ -386,7 +386,7 @@ __brzl_compileOutlinks(window).join(' ');
def set_outlinks(message):
self._outlinks = frozenset(
message["result"]["result"]["value"].split())
message["result"]["result"]["value"].split("\n"))
chain.append({
"info": "retrieving outlinks",

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b7.dev121',
version='1.1b7.dev122',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',