From f23f49108bdfe0b804635a0dfc5250521e8ab624 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Fri, 11 Oct 2019 10:45:23 +0000 Subject: [PATCH] Block more google-analytics URLs After analysing capture logs, we see that we didn't block many google-analytics related URLS which are used for web statistics. We add these to the blocked URLs. In addition, we improve existing block rules. We used to block `*google-analytics.com/analytics.js` but many sites used some kind of param in the end so these URLs weren't blocked. We add `*` in the end of the existing rules to block these cases as well. --- brozzler/browser.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index b7abd19..0e1f296 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -361,8 +361,15 @@ class Browser: # disable google analytics and amp analytics self.send_to_chrome( method='Network.setBlockedURLs', - params={'urls': ['*google-analytics.com/analytics.js', - '*google-analytics.com/ga.js', + params={'urls': ['*google-analytics.com/analytics.js*', + '*google-analytics.com/ga.js*', + '*google-analytics.com/ga_exp.js*', + '*google-analytics.com/urchin.js*', + '*google-analytics.com/collect*', + '*google-analytics.com/r/collect*', + '*google-analytics.com/__utm.gif*', + '*google-analytics.com/gtm/js?*', + '*google-analytics.com/cx/api.js*', '*cdn.ampproject.org/*/amp-analytics*.js']}) def stop(self):