mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Use urlcanon.whatwg in extracted outlinks
The aim is to improve outlink quality.
This commit is contained in:
parent
8beb96817e
commit
33b7a7f564
@ -594,8 +594,8 @@ class Browser:
|
||||
if ('result' in message and 'result' in message['result']
|
||||
and 'value' in message['result']['result']):
|
||||
if message['result']['result']['value']:
|
||||
return frozenset(
|
||||
message['result']['result']['value'].split('\n'))
|
||||
return frozenset([str(urlcanon.whatwg(link)) for link in
|
||||
message['result']['result']['value'].split('\n')])
|
||||
else:
|
||||
# no links found
|
||||
return frozenset()
|
||||
|
Loading…
x
Reference in New Issue
Block a user