mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-21 22:18:55 -04:00
ci: set up a yt-dlp test script
This runs every time we get a new yt-dlp version - we test to see if this script is able to download at least 3/5 out of a set of videos we've defined. If it succeeds, we go ahead and automatically merge the new yt-dlp version into the qa branch so that we can test further.
This commit is contained in:
parent
b4d2726e54
commit
794f7dd98d
2 changed files with 85 additions and 0 deletions
16
.github/workflows/dependabot.yml
vendored
16
.github/workflows/dependabot.yml
vendored
|
@ -9,6 +9,22 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'internetarchive/brozzler'
|
if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'internetarchive/brozzler'
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v5
|
||||||
|
- name: Test new yt-dlp
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
uv sync --extra yt-dlp --extra rethinkdb --extra warcprox --python 3.12
|
||||||
|
|
||||||
|
# Warcprox has to be running to get video capture results
|
||||||
|
.venv/bin/warcprox &
|
||||||
|
warcprox_pid=$!
|
||||||
|
|
||||||
|
uv run scripts/ytdlp_test.py
|
||||||
|
|
||||||
|
kill $warcprox_pid
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7
|
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7
|
||||||
|
|
69
scripts/ytdlp_test.py
Normal file
69
scripts/ytdlp_test.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import brozzler
|
||||||
|
|
||||||
|
CHROME_EXE = brozzler.suggest_default_chrome_exe()
|
||||||
|
|
||||||
|
|
||||||
|
def brozzle_page(worker, page) -> bool:
|
||||||
|
site = brozzler.Site(None, {})
|
||||||
|
|
||||||
|
with brozzler.Browser(chrome_exe=CHROME_EXE) as browser:
|
||||||
|
worker.brozzle_page(browser, site, page)
|
||||||
|
|
||||||
|
# This gets assigned after a video is captured; if an
|
||||||
|
# exception was raised by yt-dlp, it never gets assigned.
|
||||||
|
if not "videos" in page:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(page.videos) > 0:
|
||||||
|
response_code = page.videos[0]["response_code"]
|
||||||
|
if (
|
||||||
|
response_code >= 200
|
||||||
|
and response_code < 300
|
||||||
|
and page.videos[0]["content-length"] > 0
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
worker = brozzler.BrozzlerWorker(None, proxy="localhost:8000")
|
||||||
|
|
||||||
|
videos = [
|
||||||
|
# Short YouTube video
|
||||||
|
"https://www.youtube.com/watch?v=AdtZtvlFi9o",
|
||||||
|
# Long YouTube video (former livestream we've had trouble capturing)
|
||||||
|
"https://www.youtube.com/watch?v=v4f6InE9X_c",
|
||||||
|
# YouTube Short
|
||||||
|
"https://www.youtube.com/shorts/ee_lH4qlfzc",
|
||||||
|
# Vimeo
|
||||||
|
"https://vimeo.com/175568834",
|
||||||
|
# Instagram
|
||||||
|
"https://www.instagram.com/reel/DFZMmHONL8K/",
|
||||||
|
# Audio in a webpage
|
||||||
|
"https://www.woxx.lu/am-bistro-mat-der-woxx-308-grenzenlose-fitness/",
|
||||||
|
# Video in a webpage
|
||||||
|
"https://play.rtl.lu/shows/lb/eurovision/episodes/r/3414779",
|
||||||
|
# TikTok
|
||||||
|
"https://www.tiktok.com/@cbcnews/video/7498842317630033157",
|
||||||
|
# Twitter
|
||||||
|
"https://x.com/NationalZoo/status/690915532539838464",
|
||||||
|
# Facebook
|
||||||
|
"https://www.facebook.com/100064323443815/videos/1421958299004555",
|
||||||
|
]
|
||||||
|
|
||||||
|
successes = 0
|
||||||
|
min_successes = math.floor(len(videos) * 0.75) or 1
|
||||||
|
|
||||||
|
for url in videos:
|
||||||
|
page = brozzler.Page(None, {"url": url})
|
||||||
|
if brozzle_page(worker, page):
|
||||||
|
successes += 1
|
||||||
|
|
||||||
|
if successes >= min_successes:
|
||||||
|
print(f"Success! {successes}/{len(videos)} captures succeeded.")
|
||||||
|
else:
|
||||||
|
print(f"Failure: {successes}/{len(videos)} captures succeeded.")
|
||||||
|
sys.exit(1)
|
Loading…
Add table
Add a link
Reference in a new issue