diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml index f7ead70..eb4aaeb 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/dependabot.yml @@ -9,6 +9,22 @@ jobs: runs-on: ubuntu-latest if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'internetarchive/brozzler' steps: + - uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v5 + - name: Test new yt-dlp + run: | + set -euo pipefail + + uv sync --extra yt-dlp --extra rethinkdb --extra warcprox --python 3.12 + + # Warcprox has to be running to get video capture results + .venv/bin/warcprox & + warcprox_pid=$! + + uv run scripts/ytdlp_test.py + + kill $warcprox_pid - name: Dependabot metadata id: metadata uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 diff --git a/scripts/ytdlp_test.py b/scripts/ytdlp_test.py new file mode 100644 index 0000000..71e4500 --- /dev/null +++ b/scripts/ytdlp_test.py @@ -0,0 +1,69 @@ +import math +import sys + +import brozzler + +CHROME_EXE = brozzler.suggest_default_chrome_exe() + + +def brozzle_page(worker, page) -> bool: + site = brozzler.Site(None, {}) + + with brozzler.Browser(chrome_exe=CHROME_EXE) as browser: + worker.brozzle_page(browser, site, page) + + # This gets assigned after a video is captured; if an + # exception was raised by yt-dlp, it never gets assigned. + if not "videos" in page: + return False + + if len(page.videos) > 0: + response_code = page.videos[0]["response_code"] + if ( + response_code >= 200 + and response_code < 300 + and page.videos[0]["content-length"] > 0 + ): + return True + + return False + + +worker = brozzler.BrozzlerWorker(None, proxy="localhost:8000") + +videos = [ + # Short YouTube video + "https://www.youtube.com/watch?v=AdtZtvlFi9o", + # Long YouTube video (former livestream we've had trouble capturing) + "https://www.youtube.com/watch?v=v4f6InE9X_c", + # YouTube Short + "https://www.youtube.com/shorts/ee_lH4qlfzc", + # Vimeo + "https://vimeo.com/175568834", + # Instagram + "https://www.instagram.com/reel/DFZMmHONL8K/", + # Audio in a webpage + "https://www.woxx.lu/am-bistro-mat-der-woxx-308-grenzenlose-fitness/", + # Video in a webpage + "https://play.rtl.lu/shows/lb/eurovision/episodes/r/3414779", + # TikTok + "https://www.tiktok.com/@cbcnews/video/7498842317630033157", + # Twitter + "https://x.com/NationalZoo/status/690915532539838464", + # Facebook + "https://www.facebook.com/100064323443815/videos/1421958299004555", +] + +successes = 0 +min_successes = math.floor(len(videos) * 0.75) or 1 + +for url in videos: + page = brozzler.Page(None, {"url": url}) + if brozzle_page(worker, page): + successes += 1 + +if successes >= min_successes: + print(f"Success! {successes}/{len(videos)} captures succeeded.") +else: + print(f"Failure: {successes}/{len(videos)} captures succeeded.") + sys.exit(1)