From 30cbd3b13d4757ded2437fa23ae98035688aba72 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 3 Sep 2019 14:47:48 -0700 Subject: [PATCH] add pop urls using regex for better match --- brozzler/behaviors.yaml | 2 +- brozzler/js-templates/extract-outlinks.js | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index e792e69..570dc70 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -182,4 +182,4 @@ behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a + - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a, video diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index e3a04ca..af37c43 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,6 +1,7 @@ // we have problems if the page has changed the definition of Set or Array // http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); +var __brzl_popup_re = /window.open\(\s*(['"])(.*?)\1/ var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); var outlinks = []; @@ -8,6 +9,16 @@ var __brzl_compileOutlinks = function(frame) { if (frame && frame.document) { outlinks = Array.prototype.slice.call( frame.document.querySelectorAll('a[href], area[href]')); + popups = Array.prototype.slice.call( + frame.document.querySelectorAll('a[onclick]')); + if (popups && popups.length > 0) { + for (var p=0; p < popups.length; p++) { + m = popups[p].onclick.toString().match(__brzl_popup_re); + if (m) { + outlinks.push(m[2]); + } + } + } for (var i = 0; i < frame.frames.length; i++) { if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { outlinks = outlinks.concat(