From f2c89d1c18cb0e060549dac07bc7be9e489f6621 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 17 Apr 2024 17:23:36 -0700 Subject: [PATCH] skip more exts, plus chrome-error --- brozzler/ydl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 627da6d..635839b 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -34,7 +34,10 @@ thread_local = threading.local() def is_html_maybe(url): - skip_url_exts = ["pdf", "jpg", "jpeg", "png", "gif", "mp4", "mpeg"] + if "chrome-error:" in url: + return False + + skip_url_exts = ["pdf", "jpg", "jpeg", "png", "gif", "mp3", "mp4", "mpeg", "css", "js"] parsed_url = urlparse(url) base_url, ext = os.path.splitext(parsed_url.path)