Merge branch 'ytdlp_last' into qa

This commit is contained in:
Barbara Miller 2024-04-04 12:27:32 -07:00
commit ff8823d3d4

View file

@ -21,6 +21,7 @@ import yt_dlp
from yt_dlp.utils import match_filter_func from yt_dlp.utils import match_filter_func
import brozzler import brozzler
import urllib.request import urllib.request
from urllib.parse import urlparse
import tempfile import tempfile
import urlcanon import urlcanon
import os import os
@ -31,17 +32,24 @@ import threading
thread_local = threading.local() thread_local = threading.local()
def should_ytdlp(page): def is_html_maybe(url):
skip_url_types = ['pdf', 'jpg', 'jpeg', 'png', 'gif', 'mp4', 'mpeg'] skip_url_exts = ['pdf', 'jpg', 'jpeg', 'png', 'gif', 'mp4', 'mpeg']
if page.redirect_url:
ytdlp_url = page.redirect_url
else:
ytdlp_url = page.url
for t in skip_url_types: parsed_url = urlparse(url)
if t in ytdlp_url: base_url, ext = os.path.splitext(parsed_url.path)
logging.warning("skipping yt-dlp for %s due to unsupported guessed content type", ytdlp_url) ext = ext[1:]
for skip in skip_url_exts:
if ext.startswith(skip):
return False return False
return True
def should_ytdlp(page):
ytdlp_url = page.redirect_url if page.redirect_url else page.url
if not is_html_maybe(ytdlp_url):
logging.warning("skipping yt-dlp for %s due to unsupported extension", ytdlp_url)
return False
return True return True