handle exceptions extracting links

like this one:
Uncaught DOMException: Blocked a frame with origin "https://www.youtube.com" from accessing a cross-origin frame.
    at __brzl_compileOutlinks (<anonymous>:4:24)
    at __brzl_compileOutlinks (<anonymous>:10:29)
    at <anonymous>:16:1
__brzl_compileOutlinks @ VM194:4
__brzl_compileOutlinks @ VM194:10

not sure exactly why this happens but we just have to handle it
This commit is contained in:
Noah Levitt 2018-10-29 17:42:25 -07:00
parent af85f28908
commit 1073431f76
2 changed files with 13 additions and 7 deletions

View File

@ -3,16 +3,22 @@
var __brzl_framesDone = new Set();
var __brzl_compileOutlinks = function(frame) {
__brzl_framesDone.add(frame);
if (frame && frame.document) {
var outlinks = Array.prototype.slice.call(
var outlinks = [];
try {
if (frame && frame.document) {
outlinks = Array.prototype.slice.call(
frame.document.querySelectorAll('a[href], area[href]'));
for (var i = 0; i < frame.frames.length; i++) {
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
outlinks = outlinks.concat(
__brzl_compileOutlinks(frame.frames[i]));
for (var i = 0; i < frame.frames.length; i++) {
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
outlinks = outlinks.concat(
__brzl_compileOutlinks(frame.frames[i]));
}
}
}
} catch (e) {
console.log("exception looking at frame" + frame + ": " + e);
}
return outlinks;
}
__brzl_compileOutlinks(window).join('\n');

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.5.dev311',
version='1.5.dev312',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',