handle exceptions extracting links

like this one:
Uncaught DOMException: Blocked a frame with origin "https://www.youtube.com" from accessing a cross-origin frame.
    at __brzl_compileOutlinks (<anonymous>:4:24)
    at __brzl_compileOutlinks (<anonymous>:10:29)
    at <anonymous>:16:1
__brzl_compileOutlinks @ VM194:4
__brzl_compileOutlinks @ VM194:10

not sure exactly why this happens but we just have to handle it
This commit is contained in:
Noah Levitt 2018-10-29 17:42:25 -07:00
parent af85f28908
commit 1073431f76
2 changed files with 13 additions and 7 deletions

View File

@ -3,8 +3,10 @@
var __brzl_framesDone = new Set(); var __brzl_framesDone = new Set();
var __brzl_compileOutlinks = function(frame) { var __brzl_compileOutlinks = function(frame) {
__brzl_framesDone.add(frame); __brzl_framesDone.add(frame);
var outlinks = [];
try {
if (frame && frame.document) { if (frame && frame.document) {
var outlinks = Array.prototype.slice.call( outlinks = Array.prototype.slice.call(
frame.document.querySelectorAll('a[href], area[href]')); frame.document.querySelectorAll('a[href], area[href]'));
for (var i = 0; i < frame.frames.length; i++) { for (var i = 0; i < frame.frames.length; i++) {
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
@ -13,6 +15,10 @@ var __brzl_compileOutlinks = function(frame) {
} }
} }
} }
} catch (e) {
console.log("exception looking at frame" + frame + ": " + e);
}
return outlinks; return outlinks;
} }
__brzl_compileOutlinks(window).join('\n'); __brzl_compileOutlinks(window).join('\n');

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.5.dev311', version='1.5.dev312',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',