From 30cbd3b13d4757ded2437fa23ae98035688aba72 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 3 Sep 2019 14:47:48 -0700 Subject: [PATCH 1/4] add pop urls using regex for better match --- brozzler/behaviors.yaml | 2 +- brozzler/js-templates/extract-outlinks.js | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index e792e69..570dc70 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -182,4 +182,4 @@ behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a + - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a, video diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index e3a04ca..af37c43 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,6 +1,7 @@ // we have problems if the page has changed the definition of Set or Array // http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); +var __brzl_popup_re = /window.open\(\s*(['"])(.*?)\1/ var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); var outlinks = []; @@ -8,6 +9,16 @@ var __brzl_compileOutlinks = function(frame) { if (frame && frame.document) { outlinks = Array.prototype.slice.call( frame.document.querySelectorAll('a[href], area[href]')); + popups = Array.prototype.slice.call( + frame.document.querySelectorAll('a[onclick]')); + if (popups && popups.length > 0) { + for (var p=0; p < popups.length; p++) { + m = popups[p].onclick.toString().match(__brzl_popup_re); + if (m) { + outlinks.push(m[2]); + } + } + } for (var i = 0; i < frame.frames.length; i++) { if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { outlinks = outlinks.concat( From a3b70fcb27d56f5246ec46a1b27e6e0b6b0d9e96 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 7 Apr 2020 11:27:32 -0700 Subject: [PATCH 2/4] audio, too --- brozzler/behaviors.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brozzler/behaviors.yaml b/brozzler/behaviors.yaml index 570dc70..143c36c 100644 --- a/brozzler/behaviors.yaml +++ b/brozzler/behaviors.yaml @@ -182,4 +182,4 @@ behavior_js_template: umbraBehavior.js.j2 default_parameters: actions: - - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a, video + - selector: button.sc-button-play, .playButton, div.soundItem, .jwlist>a, audio, video From ae7248fff048ba8cb897b2923720cf3b1af8638a Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 13 Apr 2020 19:38:18 -0700 Subject: [PATCH 3/4] add dblclick (and fix typo) --- brozzler/js-templates/extract-outlinks.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index af37c43..f839ecf 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,7 +1,7 @@ // we have problems if the page has changed the definition of Set or Array // http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); -var __brzl_popup_re = /window.open\(\s*(['"])(.*?)\1/ +var __brzl_popup_re = /window.open\(\s*(['"])(.*?)\1/; var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); var outlinks = []; @@ -10,10 +10,14 @@ var __brzl_compileOutlinks = function(frame) { outlinks = Array.prototype.slice.call( frame.document.querySelectorAll('a[href], area[href]')); popups = Array.prototype.slice.call( - frame.document.querySelectorAll('a[onclick]')); + frame.document.querySelectorAll('a[onclick], a[ondblclick]')); if (popups && popups.length > 0) { for (var p=0; p < popups.length; p++) { - m = popups[p].onclick.toString().match(__brzl_popup_re); + if (popups[p].onclick){ + m = popups[p].onclick.toString().match(__brzl_popup_re); + } else { + m = popups[p].ondblclick.toString().match(__brzl_popup_re); + } if (m) { outlinks.push(m[2]); } From 04fba79d34ad7480e23550eb5801959cca260714 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 16 Apr 2020 18:09:03 -0700 Subject: [PATCH 4/4] faster regex match --- brozzler/js-templates/extract-outlinks.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index f839ecf..4778df1 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -14,9 +14,9 @@ var __brzl_compileOutlinks = function(frame) { if (popups && popups.length > 0) { for (var p=0; p < popups.length; p++) { if (popups[p].onclick){ - m = popups[p].onclick.toString().match(__brzl_popup_re); + m = __brzl_popup_re[Symbol.match](popups[p].onclick.toString()); } else { - m = popups[p].ondblclick.toString().match(__brzl_popup_re); + m = __brzl_popup_re[Symbol.match](popups[p].ondblclick.toString()); } if (m) { outlinks.push(m[2]);