mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
Merge branch 'ARI-5294' into qa
This commit is contained in:
commit
455014a631
3 changed files with 5 additions and 6 deletions
|
@ -97,11 +97,10 @@
|
||||||
click_until_hard_timeout: False
|
click_until_hard_timeout: False
|
||||||
- # https://webarchive.jira.com/browse/ARI-5294
|
- # https://webarchive.jira.com/browse/ARI-5294
|
||||||
url_regex: '^https?://citymedfordwi\.civicweb\.net/.*$'
|
url_regex: '^https?://citymedfordwi\.civicweb\.net/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: umbraBehavior.js.j2
|
||||||
default_parameters:
|
default_parameters:
|
||||||
click_css_selector: div.meeting-document-type-buttons button.button-small
|
actions:
|
||||||
click_until_hard_timeout: False
|
- selector: div.meeting-document-type-buttons button.button-small
|
||||||
request_idle_timeout_sec: 10
|
|
||||||
- # https://webarchive.jira.com/browse/ARI-5409
|
- # https://webarchive.jira.com/browse/ARI-5409
|
||||||
url_regex: '^https?://(?:www\.)?tuebingen.de/.*$'
|
url_regex: '^https?://(?:www\.)?tuebingen.de/.*$'
|
||||||
behavior_js_template: simpleclicks.js.j2
|
behavior_js_template: simpleclicks.js.j2
|
||||||
|
|
|
@ -296,7 +296,7 @@ class BrozzlerWorker:
|
||||||
# we do whatwg canonicalization here to avoid "<urlopen error
|
# we do whatwg canonicalization here to avoid "<urlopen error
|
||||||
# no host given>" resulting in ProxyError
|
# no host given>" resulting in ProxyError
|
||||||
# needs automated test
|
# needs automated test
|
||||||
info = ydl.extract_info(urlcanon.whatwg(page.url))
|
info = ydl.extract_info(str(urlcanon.whatwg(page.url)))
|
||||||
self._remember_videos(page, ydl.brozzler_spy)
|
self._remember_videos(page, ydl.brozzler_spy)
|
||||||
# logging.info('XXX %s', json.dumps(info))
|
# logging.info('XXX %s', json.dumps(info))
|
||||||
if self._using_warcprox(site):
|
if self._using_warcprox(site):
|
||||||
|
|
2
setup.py
Normal file → Executable file
2
setup.py
Normal file → Executable file
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b12.dev278',
|
version='1.1b12.dev279',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue