From 33b7a7f564b8e3e3136fb2a87eeb17b334b19404 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 31 Oct 2019 21:27:55 +0000 Subject: [PATCH] Use urlcanon.whatwg in extracted outlinks The aim is to improve outlink quality. --- brozzler/browser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index 0e1f296..33c1ef1 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -594,8 +594,8 @@ class Browser: if ('result' in message and 'result' in message['result'] and 'value' in message['result']['result']): if message['result']['result']['value']: - return frozenset( - message['result']['result']['value'].split('\n')) + return frozenset([str(urlcanon.whatwg(link)) for link in + message['result']['result']['value'].split('\n')]) else: # no links found return frozenset()