diff --git a/brozzler/js-templates/extract-outlinks.js b/brozzler/js-templates/extract-outlinks.js index e3a04ca..a389484 100644 --- a/brozzler/js-templates/extract-outlinks.js +++ b/brozzler/js-templates/extract-outlinks.js @@ -1,6 +1,7 @@ // we have problems if the page has changed the definition of Set or Array // http://www.polyvore.com/ does this for example var __brzl_framesDone = new Set(); +var __brzl_popup_re = /window.open\(.*?(['"])(http.*?)\1,/ var __brzl_compileOutlinks = function(frame) { __brzl_framesDone.add(frame); var outlinks = []; @@ -8,6 +9,15 @@ var __brzl_compileOutlinks = function(frame) { if (frame && frame.document) { outlinks = Array.prototype.slice.call( frame.document.querySelectorAll('a[href], area[href]')); + popups = Array.prototype.slice.call( + frame.document.querySelectorAll('a[onclick]')); + if (popups && popups.length > 0) { + for (var p=0; p < popups.length; p++) { + m = popups[p].onclick.toString().match(__brzl_popup_re); + popups[p] = m[2]; + } + outlinks = outlinks.concat(popups); + } for (var i = 0; i < frame.frames.length; i++) { if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) { outlinks = outlinks.concat(