diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js
index 65c4098..e9e8a47 100644
--- a/brozzler/js-templates/extract-outlinks.js
+++ b/brozzler/js-templates/extract-outlinks.js
@@ -5,7 +5,7 @@ var __brzl_compileOutlinks = function(frame) {
__brzl_framesDone.add(frame);
if (frame && frame.document) {
var outlinks = Array.prototype.slice.call(
- frame.document.querySelectorAll('a[href]'));
+ frame.document.querySelectorAll('a[href], area[href]'));
for (var i = 0; i < frame.frames.length; i++) {
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
outlinks = outlinks.concat(
diff --git a/setup.py b/setup.py
index 66d9b61..2182db2 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
- version='1.1b11.dev225',
+ version='1.1b11.dev226',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',
diff --git a/tests/htdocs/site8/index.html b/tests/htdocs/site8/index.html
new file mode 100644
index 0000000..0bf7d6e
--- /dev/null
+++ b/tests/htdocs/site8/index.html
@@ -0,0 +1,14 @@
+
+
+ outlinks
+
+
+ baz/quux/../zuh
+ fdjisapofdjisap#yessss
+
+
+
diff --git a/tests/test_brozzling.py b/tests/test_brozzling.py
index b033cfd..c43c2f1 100644
--- a/tests/test_brozzling.py
+++ b/tests/test_brozzling.py
@@ -2,7 +2,7 @@
'''
test_brozzling.py - XXX explain
-Copyright (C) 2016 Internet Archive
+Copyright (C) 2016-2017 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -147,7 +147,6 @@ def test_page_videos(httpd):
# to be adjusted on youtube-dl or chromium updates
chrome_exe = brozzler.suggest_default_chrome_exe()
worker = brozzler.BrozzlerWorker(None)
- chrome_exe = brozzler.suggest_default_chrome_exe()
site = brozzler.Site(None, {})
page = brozzler.Page(None, {
'url':'http://localhost:%s/site6/' % httpd.server_port})
@@ -172,3 +171,18 @@ def test_page_videos(httpd):
'url': 'http://localhost:%s/site6/small.webm' % httpd.server_port,
}
+def test_extract_outlinks(httpd):
+ chrome_exe = brozzler.suggest_default_chrome_exe()
+ worker = brozzler.BrozzlerWorker(None)
+ site = brozzler.Site(None, {})
+ page = brozzler.Page(None, {
+ 'url':'http://localhost:%s/site8/' % httpd.server_port})
+ with brozzler.Browser(chrome_exe=chrome_exe) as browser:
+ outlinks = worker.brozzle_page(browser, site, page)
+ assert outlinks == {
+ 'http://example.com/offsite',
+ 'http://localhost:%s/site8/baz/zuh' % httpd.server_port,
+ 'http://localhost:%s/site8/fdjisapofdjisap#1' % httpd.server_port,
+ 'http://localhost:%s/site8/fdjisapofdjisap#2' % httpd.server_port
+ }
+