mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
handle exceptions extracting links
like this one: Uncaught DOMException: Blocked a frame with origin "https://www.youtube.com" from accessing a cross-origin frame. at __brzl_compileOutlinks (<anonymous>:4:24) at __brzl_compileOutlinks (<anonymous>:10:29) at <anonymous>:16:1 __brzl_compileOutlinks @ VM194:4 __brzl_compileOutlinks @ VM194:10 not sure exactly why this happens but we just have to handle it
This commit is contained in:
parent
af85f28908
commit
1073431f76
@ -3,8 +3,10 @@
|
|||||||
var __brzl_framesDone = new Set();
|
var __brzl_framesDone = new Set();
|
||||||
var __brzl_compileOutlinks = function(frame) {
|
var __brzl_compileOutlinks = function(frame) {
|
||||||
__brzl_framesDone.add(frame);
|
__brzl_framesDone.add(frame);
|
||||||
|
var outlinks = [];
|
||||||
|
try {
|
||||||
if (frame && frame.document) {
|
if (frame && frame.document) {
|
||||||
var outlinks = Array.prototype.slice.call(
|
outlinks = Array.prototype.slice.call(
|
||||||
frame.document.querySelectorAll('a[href], area[href]'));
|
frame.document.querySelectorAll('a[href], area[href]'));
|
||||||
for (var i = 0; i < frame.frames.length; i++) {
|
for (var i = 0; i < frame.frames.length; i++) {
|
||||||
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
|
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
|
||||||
@ -13,6 +15,10 @@ var __brzl_compileOutlinks = function(frame) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log("exception looking at frame" + frame + ": " + e);
|
||||||
|
}
|
||||||
|
|
||||||
return outlinks;
|
return outlinks;
|
||||||
}
|
}
|
||||||
__brzl_compileOutlinks(window).join('\n');
|
__brzl_compileOutlinks(window).join('\n');
|
||||||
|
2
setup.py
2
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.5.dev311',
|
version='1.5.dev312',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user