Merge branch 'ARI-5294' into qa

This commit is contained in:
Barbara Miller 2018-01-23 11:47:57 -08:00
commit 455014a631
3 changed files with 5 additions and 6 deletions

View File

@ -97,11 +97,10 @@
click_until_hard_timeout: False
- # https://webarchive.jira.com/browse/ARI-5294
url_regex: '^https?://citymedfordwi\.civicweb\.net/.*$'
behavior_js_template: simpleclicks.js.j2
behavior_js_template: umbraBehavior.js.j2
default_parameters:
click_css_selector: div.meeting-document-type-buttons button.button-small
click_until_hard_timeout: False
request_idle_timeout_sec: 10
actions:
- selector: div.meeting-document-type-buttons button.button-small
- # https://webarchive.jira.com/browse/ARI-5409
url_regex: '^https?://(?:www\.)?tuebingen.de/.*$'
behavior_js_template: simpleclicks.js.j2

View File

@ -296,7 +296,7 @@ class BrozzlerWorker:
# we do whatwg canonicalization here to avoid "<urlopen error
# no host given>" resulting in ProxyError
# needs automated test
info = ydl.extract_info(urlcanon.whatwg(page.url))
info = ydl.extract_info(str(urlcanon.whatwg(page.url)))
self._remember_videos(page, ydl.brozzler_spy)
# logging.info('XXX %s', json.dumps(info))
if self._using_warcprox(site):

2
setup.py Normal file → Executable file
View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b12.dev278',
version='1.1b12.dev279',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',