diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index e9e8a47..e3a04ca 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -3,16 +3,22 @@ var __brzl_framesDone = new Set(); var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); - if (frame && frame.document) { - var outlinks = Array.prototype.slice.call( + var outlinks = []; + try { + if (frame && frame.document) { + outlinks = Array.prototype.slice.call( frame.document.querySelectorAll('a[href], area[href]')); - for (var i = 0; i < frame.frames.length; i++) { - if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { - outlinks = outlinks.concat( - __brzl_compileOutlinks(frame.frames[i])); + for (var i = 0; i < frame.frames.length; i++) { + if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { + outlinks = outlinks.concat( + __brzl_compileOutlinks(frame.frames[i])); + } } } + } catch (e) { + console.log("exception looking at frame" + frame + ": " + e); } + return outlinks; } __brzl_compileOutlinks(window).join('\n'); diff --git a/setup.py b/setup.py index d0f8cff..948bb4f 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.5.dev311', + version='1.5.dev312', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt',