ci: set up a yt-dlp test script

This runs every time we get a new yt-dlp version - we test to see if
this script is able to download at least 3/5 out of a set of videos
we've defined. If it succeeds, we go ahead and automatically merge
the new yt-dlp version into the qa branch so that we can test
further.
This commit is contained in:
Misty De Méo 2025-05-22 11:23:04 -07:00 committed by Misty De Méo
parent b4d2726e54
commit 794f7dd98d
2 changed files with 85 additions and 0 deletions

View file

@ -9,6 +9,22 @@ jobs:
runs-on: ubuntu-latest
if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'internetarchive/brozzler'
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Test new yt-dlp
run: |
set -euo pipefail
uv sync --extra yt-dlp --extra rethinkdb --extra warcprox --python 3.12
# Warcprox has to be running to get video capture results
.venv/bin/warcprox &
warcprox_pid=$!
uv run scripts/ytdlp_test.py
kill $warcprox_pid
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7

69
scripts/ytdlp_test.py Normal file
View file

@ -0,0 +1,69 @@
import math
import sys
import brozzler
CHROME_EXE = brozzler.suggest_default_chrome_exe()
def brozzle_page(worker, page) -> bool:
site = brozzler.Site(None, {})
with brozzler.Browser(chrome_exe=CHROME_EXE) as browser:
worker.brozzle_page(browser, site, page)
# This gets assigned after a video is captured; if an
# exception was raised by yt-dlp, it never gets assigned.
if not "videos" in page:
return False
if len(page.videos) > 0:
response_code = page.videos[0]["response_code"]
if (
response_code >= 200
and response_code < 300
and page.videos[0]["content-length"] > 0
):
return True
return False
worker = brozzler.BrozzlerWorker(None, proxy="localhost:8000")
videos = [
# Short YouTube video
"https://www.youtube.com/watch?v=AdtZtvlFi9o",
# Long YouTube video (former livestream we've had trouble capturing)
"https://www.youtube.com/watch?v=v4f6InE9X_c",
# YouTube Short
"https://www.youtube.com/shorts/ee_lH4qlfzc",
# Vimeo
"https://vimeo.com/175568834",
# Instagram
"https://www.instagram.com/reel/DFZMmHONL8K/",
# Audio in a webpage
"https://www.woxx.lu/am-bistro-mat-der-woxx-308-grenzenlose-fitness/",
# Video in a webpage
"https://play.rtl.lu/shows/lb/eurovision/episodes/r/3414779",
# TikTok
"https://www.tiktok.com/@cbcnews/video/7498842317630033157",
# Twitter
"https://x.com/NationalZoo/status/690915532539838464",
# Facebook
"https://www.facebook.com/100064323443815/videos/1421958299004555",
]
successes = 0
min_successes = math.floor(len(videos) * 0.75) or 1
for url in videos:
page = brozzler.Page(None, {"url": url})
if brozzle_page(worker, page):
successes += 1
if successes >= min_successes:
print(f"Success! {successes}/{len(videos)} captures succeeded.")
else:
print(f"Failure: {successes}/{len(videos)} captures succeeded.")
sys.exit(1)