From 7aabc5f65542cc5d6660407e6acfb5bc36d5f2f4 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Sun, 23 May 2021 11:31:47 +0000 Subject: [PATCH] Skip invalid outlink When one of the outlinks is `http://-1/` `urlcanon.whatwg` raises an unhandled exception `ipaddress.AddressValueError` and the capture fails. We can skip the problematic outlink and keep the rest without crashing. --- brozzler/browser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index 1960b9e..8692e1d 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -28,6 +28,7 @@ import brozzler from requests.structures import CaseInsensitiveDict import datetime import base64 +from ipaddress import AddressValueError from brozzler.chrome import Chrome import socket import urlcanon @@ -619,8 +620,13 @@ class Browser: if ('result' in message and 'result' in message['result'] and 'value' in message['result']['result']): if message['result']['result']['value']: - return frozenset([str(urlcanon.whatwg(link)) for link in - message['result']['result']['value'].split('\n')]) + out = [] + for link in message['result']['result']['value'].split('\n'): + try: + out.append(str(urlcanon.whatwg(link))) + except AddressValueError: + self.logger.warning('skip invalid outlink: %s', link) + return frozenset(out) else: # no links found return frozenset()