mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
Merge pull request #223 from vbanos/fix-AddressValueError
Skip invalid outlink
This commit is contained in:
commit
b59c4395ed
@ -28,6 +28,7 @@ import brozzler
|
|||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
import datetime
|
import datetime
|
||||||
import base64
|
import base64
|
||||||
|
from ipaddress import AddressValueError
|
||||||
from brozzler.chrome import Chrome
|
from brozzler.chrome import Chrome
|
||||||
import socket
|
import socket
|
||||||
import urlcanon
|
import urlcanon
|
||||||
@ -619,8 +620,13 @@ class Browser:
|
|||||||
if ('result' in message and 'result' in message['result']
|
if ('result' in message and 'result' in message['result']
|
||||||
and 'value' in message['result']['result']):
|
and 'value' in message['result']['result']):
|
||||||
if message['result']['result']['value']:
|
if message['result']['result']['value']:
|
||||||
return frozenset([str(urlcanon.whatwg(link)) for link in
|
out = []
|
||||||
message['result']['result']['value'].split('\n')])
|
for link in message['result']['result']['value'].split('\n'):
|
||||||
|
try:
|
||||||
|
out.append(str(urlcanon.whatwg(link)))
|
||||||
|
except AddressValueError:
|
||||||
|
self.logger.warning('skip invalid outlink: %s', link)
|
||||||
|
return frozenset(out)
|
||||||
else:
|
else:
|
||||||
# no links found
|
# no links found
|
||||||
return frozenset()
|
return frozenset()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user