mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-21 14:09:00 -04:00
ci: set up a yt-dlp test script
This runs every time we get a new yt-dlp version - we test to see if this script is able to download at least 3/5 out of a set of videos we've defined. If it succeeds, we go ahead and automatically merge the new yt-dlp version into the qa branch so that we can test further.
This commit is contained in:
parent
b4d2726e54
commit
794f7dd98d
2 changed files with 85 additions and 0 deletions
16
.github/workflows/dependabot.yml
vendored
16
.github/workflows/dependabot.yml
vendored
|
@ -9,6 +9,22 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'internetarchive/brozzler'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
- name: Test new yt-dlp
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
uv sync --extra yt-dlp --extra rethinkdb --extra warcprox --python 3.12
|
||||
|
||||
# Warcprox has to be running to get video capture results
|
||||
.venv/bin/warcprox &
|
||||
warcprox_pid=$!
|
||||
|
||||
uv run scripts/ytdlp_test.py
|
||||
|
||||
kill $warcprox_pid
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7
|
||||
|
|
69
scripts/ytdlp_test.py
Normal file
69
scripts/ytdlp_test.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import math
|
||||
import sys
|
||||
|
||||
import brozzler
|
||||
|
||||
CHROME_EXE = brozzler.suggest_default_chrome_exe()
|
||||
|
||||
|
||||
def brozzle_page(worker, page) -> bool:
|
||||
site = brozzler.Site(None, {})
|
||||
|
||||
with brozzler.Browser(chrome_exe=CHROME_EXE) as browser:
|
||||
worker.brozzle_page(browser, site, page)
|
||||
|
||||
# This gets assigned after a video is captured; if an
|
||||
# exception was raised by yt-dlp, it never gets assigned.
|
||||
if not "videos" in page:
|
||||
return False
|
||||
|
||||
if len(page.videos) > 0:
|
||||
response_code = page.videos[0]["response_code"]
|
||||
if (
|
||||
response_code >= 200
|
||||
and response_code < 300
|
||||
and page.videos[0]["content-length"] > 0
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
worker = brozzler.BrozzlerWorker(None, proxy="localhost:8000")
|
||||
|
||||
videos = [
|
||||
# Short YouTube video
|
||||
"https://www.youtube.com/watch?v=AdtZtvlFi9o",
|
||||
# Long YouTube video (former livestream we've had trouble capturing)
|
||||
"https://www.youtube.com/watch?v=v4f6InE9X_c",
|
||||
# YouTube Short
|
||||
"https://www.youtube.com/shorts/ee_lH4qlfzc",
|
||||
# Vimeo
|
||||
"https://vimeo.com/175568834",
|
||||
# Instagram
|
||||
"https://www.instagram.com/reel/DFZMmHONL8K/",
|
||||
# Audio in a webpage
|
||||
"https://www.woxx.lu/am-bistro-mat-der-woxx-308-grenzenlose-fitness/",
|
||||
# Video in a webpage
|
||||
"https://play.rtl.lu/shows/lb/eurovision/episodes/r/3414779",
|
||||
# TikTok
|
||||
"https://www.tiktok.com/@cbcnews/video/7498842317630033157",
|
||||
# Twitter
|
||||
"https://x.com/NationalZoo/status/690915532539838464",
|
||||
# Facebook
|
||||
"https://www.facebook.com/100064323443815/videos/1421958299004555",
|
||||
]
|
||||
|
||||
successes = 0
|
||||
min_successes = math.floor(len(videos) * 0.75) or 1
|
||||
|
||||
for url in videos:
|
||||
page = brozzler.Page(None, {"url": url})
|
||||
if brozzle_page(worker, page):
|
||||
successes += 1
|
||||
|
||||
if successes >= min_successes:
|
||||
print(f"Success! {successes}/{len(videos)} captures succeeded.")
|
||||
else:
|
||||
print(f"Failure: {successes}/{len(videos)} captures succeeded.")
|
||||
sys.exit(1)
|
Loading…
Add table
Add a link
Reference in a new issue