From ac9950a1ea3a66c1b15f359c98dd91e303976695 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 24 Sep 2019 16:40:45 -0700 Subject: [PATCH] better regex, outlinks.push(m[2]) --- brozzler/js-templates/extract-outlinks.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index a389484..0ecb788 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,7 +1,7 @@ // we have problems if the page has changed the definition of Set or Array // http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); -var __brzl_popup_re = /window.open\(.*?(['"])(http.*?)\1,/ +var __brzl_popup_re = /window.open\(.*?(['"])((?:https?)?\/\/.*?)\1,/ var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); var outlinks = []; @@ -14,7 +14,9 @@ var __brzl_compileOutlinks = function(frame) { if (popups && popups.length > 0) { for (var p=0; p < popups.length; p++) { m = popups[p].onclick.toString().match(__brzl_popup_re); - popups[p] = m[2]; + if (m) { + outlinks.push(m[2]); + } } outlinks = outlinks.concat(popups); }