mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
Use urlcanon.whatwg in extracted outlinks
The aim is to improve outlink quality.
This commit is contained in:
parent
8beb96817e
commit
33b7a7f564
@ -594,8 +594,8 @@ class Browser:
|
|||||||
if ('result' in message and 'result' in message['result']
|
if ('result' in message and 'result' in message['result']
|
||||||
and 'value' in message['result']['result']):
|
and 'value' in message['result']['result']):
|
||||||
if message['result']['result']['value']:
|
if message['result']['result']['value']:
|
||||||
return frozenset(
|
return frozenset([str(urlcanon.whatwg(link)) for link in
|
||||||
message['result']['result']['value'].split('\n'))
|
message['result']['result']['value'].split('\n')])
|
||||||
else:
|
else:
|
||||||
# no links found
|
# no links found
|
||||||
return frozenset()
|
return frozenset()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user