From 85ae741b5d5fa1e62173994e6ec84826349c214a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 14:33:25 -0700 Subject: [PATCH 01/10] deps: bump ruff --- uv.lock | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/uv.lock b/uv.lock index 5185a18..bdf18c4 100644 --- a/uv.lock +++ b/uv.lock @@ -892,27 +892,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.9.10" +version = "0.12.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/ce/8d7dbedede481245b489b769d27e2934730791a9a82765cb94566c6e6abd/ruff-0.12.4.tar.gz", hash = "sha256:13efa16df6c6eeb7d0f091abae50f58e9522f3843edb40d56ad52a5a4a4b6873", size = 5131435, upload-time = "2025-07-17T17:27:19.138Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" }, - { url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" }, - { url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" }, - { url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" }, - { url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" }, - { url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" }, - { url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" }, - { url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" }, - { url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" }, - { url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" }, - { url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" }, - { url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" }, - { url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" }, - { url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" }, - { url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9f/517bc5f61bad205b7f36684ffa5415c013862dee02f55f38a217bdbe7aa4/ruff-0.12.4-py3-none-linux_armv6l.whl", hash = "sha256:cb0d261dac457ab939aeb247e804125a5d521b21adf27e721895b0d3f83a0d0a", size = 10188824, upload-time = "2025-07-17T17:26:31.412Z" }, + { url = "https://files.pythonhosted.org/packages/28/83/691baae5a11fbbde91df01c565c650fd17b0eabed259e8b7563de17c6529/ruff-0.12.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:55c0f4ca9769408d9b9bac530c30d3e66490bd2beb2d3dae3e4128a1f05c7442", size = 10884521, upload-time = "2025-07-17T17:26:35.084Z" }, + { url = "https://files.pythonhosted.org/packages/d6/8d/756d780ff4076e6dd035d058fa220345f8c458391f7edfb1c10731eedc75/ruff-0.12.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a8224cc3722c9ad9044da7f89c4c1ec452aef2cfe3904365025dd2f51daeae0e", size = 10277653, upload-time = "2025-07-17T17:26:37.897Z" }, + { url = "https://files.pythonhosted.org/packages/8d/97/8eeee0f48ece153206dce730fc9e0e0ca54fd7f261bb3d99c0a4343a1892/ruff-0.12.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9949d01d64fa3672449a51ddb5d7548b33e130240ad418884ee6efa7a229586", size = 10485993, upload-time = "2025-07-17T17:26:40.68Z" }, + { url = "https://files.pythonhosted.org/packages/49/b8/22a43d23a1f68df9b88f952616c8508ea6ce4ed4f15353b8168c48b2d7e7/ruff-0.12.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:be0593c69df9ad1465e8a2d10e3defd111fdb62dcd5be23ae2c06da77e8fcffb", size = 10022824, upload-time = "2025-07-17T17:26:43.564Z" }, + { url = "https://files.pythonhosted.org/packages/cd/70/37c234c220366993e8cffcbd6cadbf332bfc848cbd6f45b02bade17e0149/ruff-0.12.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7dea966bcb55d4ecc4cc3270bccb6f87a337326c9dcd3c07d5b97000dbff41c", size = 11524414, upload-time = "2025-07-17T17:26:46.219Z" }, + { url = "https://files.pythonhosted.org/packages/14/77/c30f9964f481b5e0e29dd6a1fae1f769ac3fd468eb76fdd5661936edd262/ruff-0.12.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:afcfa3ab5ab5dd0e1c39bf286d829e042a15e966b3726eea79528e2e24d8371a", size = 12419216, upload-time = "2025-07-17T17:26:48.883Z" }, + { url = "https://files.pythonhosted.org/packages/6e/79/af7fe0a4202dce4ef62c5e33fecbed07f0178f5b4dd9c0d2fcff5ab4a47c/ruff-0.12.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c057ce464b1413c926cdb203a0f858cd52f3e73dcb3270a3318d1630f6395bb3", size = 11976756, upload-time = "2025-07-17T17:26:51.754Z" }, + { url = "https://files.pythonhosted.org/packages/09/d1/33fb1fc00e20a939c305dbe2f80df7c28ba9193f7a85470b982815a2dc6a/ruff-0.12.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e64b90d1122dc2713330350626b10d60818930819623abbb56535c6466cce045", size = 11020019, upload-time = "2025-07-17T17:26:54.265Z" }, + { url = "https://files.pythonhosted.org/packages/64/f4/e3cd7f7bda646526f09693e2e02bd83d85fff8a8222c52cf9681c0d30843/ruff-0.12.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2abc48f3d9667fdc74022380b5c745873499ff827393a636f7a59da1515e7c57", size = 11277890, upload-time = "2025-07-17T17:26:56.914Z" }, + { url = "https://files.pythonhosted.org/packages/5e/d0/69a85fb8b94501ff1a4f95b7591505e8983f38823da6941eb5b6badb1e3a/ruff-0.12.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2b2449dc0c138d877d629bea151bee8c0ae3b8e9c43f5fcaafcd0c0d0726b184", size = 10348539, upload-time = "2025-07-17T17:26:59.381Z" }, + { url = "https://files.pythonhosted.org/packages/16/a0/91372d1cb1678f7d42d4893b88c252b01ff1dffcad09ae0c51aa2542275f/ruff-0.12.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:56e45bb11f625db55f9b70477062e6a1a04d53628eda7784dce6e0f55fd549eb", size = 10009579, upload-time = "2025-07-17T17:27:02.462Z" }, + { url = "https://files.pythonhosted.org/packages/23/1b/c4a833e3114d2cc0f677e58f1df6c3b20f62328dbfa710b87a1636a5e8eb/ruff-0.12.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:478fccdb82ca148a98a9ff43658944f7ab5ec41c3c49d77cd99d44da019371a1", size = 10942982, upload-time = "2025-07-17T17:27:05.343Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ce/ce85e445cf0a5dd8842f2f0c6f0018eedb164a92bdf3eda51984ffd4d989/ruff-0.12.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0fc426bec2e4e5f4c4f182b9d2ce6a75c85ba9bcdbe5c6f2a74fcb8df437df4b", size = 11343331, upload-time = "2025-07-17T17:27:08.652Z" }, + { url = "https://files.pythonhosted.org/packages/35/cf/441b7fc58368455233cfb5b77206c849b6dfb48b23de532adcc2e50ccc06/ruff-0.12.4-py3-none-win32.whl", hash = "sha256:4de27977827893cdfb1211d42d84bc180fceb7b72471104671c59be37041cf93", size = 10267904, upload-time = "2025-07-17T17:27:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/ce/7e/20af4a0df5e1299e7368d5ea4350412226afb03d95507faae94c80f00afd/ruff-0.12.4-py3-none-win_amd64.whl", hash = "sha256:fe0b9e9eb23736b453143d72d2ceca5db323963330d5b7859d60d101147d461a", size = 11209038, upload-time = "2025-07-17T17:27:14.417Z" }, + { url = "https://files.pythonhosted.org/packages/11/02/8857d0dfb8f44ef299a5dfd898f673edefb71e3b533b3b9d2db4c832dd13/ruff-0.12.4-py3-none-win_arm64.whl", hash = "sha256:0618ec4442a83ab545e5b71202a5c0ed7791e8471435b94e655b570a5031a98e", size = 10469336, upload-time = "2025-07-17T17:27:16.913Z" }, ] [[package]] From 0f0ae4fbc3b5aab5ecba82a2b909dc2ae7ace538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 14:14:32 -0700 Subject: [PATCH 02/10] remove unnecessary imports, use find_spec This was flagged by ruff check - if we just want to find out if a package is available, and don't need to actually import it, we can use importlib.util.find_spec() to resolve it. This can lead to a moderate speedup too, since the import might be slow. --- brozzler/__init__.py | 10 ++-------- brozzler/worker.py | 9 ++------- tests/test_cli.py | 11 +++++------ 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 2150190..cbdc1e9 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -18,6 +18,7 @@ limitations under the License. """ import datetime +import importlib.util import logging import threading from importlib.metadata import version as _version @@ -414,12 +415,7 @@ __all__ = [ "suggest_default_chrome_exe", ] -# TODO try using importlib.util.find_spec to test for dependency presence -# rather than try/except on import. -# See https://docs.astral.sh/ruff/rules/unused-import/#example -try: - import doublethink # noqa: F401 - +if importlib.util.find_spec("doublethink"): # All of these imports use doublethink for real and are unsafe # to do if doublethink is unavailable. from brozzler.frontier import RethinkDbFrontier # noqa: F401 @@ -447,8 +443,6 @@ try: "InvalidJobConf", ] ) -except ImportError: - pass # we could make this configurable if there's a good reason MAX_PAGE_FAILURES = 3 diff --git a/brozzler/worker.py b/brozzler/worker.py index 7e2b254..43e2a02 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -19,6 +19,7 @@ limitations under the License. """ import datetime +import importlib.util import io import json import socket @@ -100,14 +101,8 @@ class BrozzlerWorker: if worker_id is not None: self.logger = self.logger.bind(worker_id=worker_id) - # TODO try using importlib.util.find_spec to test for dependency - # presence rather than try/except on import. - # See https://docs.astral.sh/ruff/rules/unused-import/#example - # We definitely shouldn't ytdlp if the optional extra is missing - try: - import yt_dlp # noqa: F401 - except ImportError: + if not importlib.util.find_spec("yt_dlp"): self.logger.info( "optional yt-dlp extra not installed; setting skip_youtube_dl to True" ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2e23ac1..11a7447 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,6 +18,7 @@ limitations under the License. """ import importlib.metadata +import importlib.util import os import subprocess @@ -47,14 +48,12 @@ def console_scripts(): def cli_commands(): commands = set(console_scripts().keys()) commands.remove("brozzler-wayback") - try: - import gunicorn # noqa: F401 - except ImportError: + if not importlib.util.find_spec("gunicorn"): commands.remove("brozzler-dashboard") - try: - import pywb # noqa: F401 - except ImportError: + + if not importlib.util.find_spec("pywb"): commands.remove("brozzler-easy") + return commands From 306e55d61ab7cb651de50d9d5cee6a0d6513ca01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 15:10:28 -0700 Subject: [PATCH 03/10] ci: fix daily run I migrated our regular tests to use `uv`, but neglected to update this config too. --- .github/workflows/daily.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml index a5b2258..0aa516c 100644 --- a/.github/workflows/daily.yaml +++ b/.github/workflows/daily.yaml @@ -12,12 +12,13 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - uses: ./.github/workflows/setup with: python-version: '3.12' - - uses: ./.github/workflows/setup - - name: Run tests run: | - py.test --tb=native --verbose tests + uv run py.test --tb=native --verbose tests From cb2ee89aeef1bd0fa4c93b86ef3b2600af3797e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 15:30:44 -0700 Subject: [PATCH 04/10] tests: fix out of date frontier fixture --- tests/test_frontier.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_frontier.py b/tests/test_frontier.py index e5d5b79..e04d207 100644 --- a/tests/test_frontier.py +++ b/tests/test_frontier.py @@ -68,6 +68,7 @@ def test_basics(rethinker): "seeds": [{"url": "http://example.com"}, {"url": "https://example.org/"}] }, "status": "ACTIVE", + "pdfs_only": False, "starts_and_stops": [{"start": job.starts_and_stops[0]["start"], "stop": None}], } @@ -83,11 +84,11 @@ def test_basics(rethinker): "last_disclaimed": brozzler.EPOCH_UTC, "scope": {"accepts": [{"ssurt": "com,example,//http:/"}]}, "seed": "http://example.com", - "skip_ytdlp": None, "starts_and_stops": [ {"start": sites[0].starts_and_stops[0]["start"], "stop": None} ], "status": "ACTIVE", + "video_capture": "ENABLE_VIDEO_CAPTURE", } assert sites[1] == { "claimed": False, @@ -97,7 +98,6 @@ def test_basics(rethinker): "last_disclaimed": brozzler.EPOCH_UTC, "scope": {"accepts": [{"ssurt": "org,example,//https:/"}]}, "seed": "https://example.org/", - "skip_ytdlp": None, "starts_and_stops": [ { "start": sites[1].starts_and_stops[0]["start"], @@ -105,6 +105,7 @@ def test_basics(rethinker): }, ], "status": "ACTIVE", + "video_capture": "ENABLE_VIDEO_CAPTURE", } pages = list(frontier.site_pages(sites[0].id)) From dfcfed8acee01559372c149920f9f4954d06930a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 15:57:48 -0700 Subject: [PATCH 05/10] ci: skip manpage generation This should speed up dependency installs. --- .github/workflows/setup/action.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/setup/action.yml b/.github/workflows/setup/action.yml index a0203f0..940c65b 100644 --- a/.github/workflows/setup/action.yml +++ b/.github/workflows/setup/action.yml @@ -15,6 +15,11 @@ runs: - name: Set up rethinkdb run: | + # We don't need manpages in CI, and they take a significant amount + # of time to generate + echo "set man-db/auto-update false" | sudo debconf-communicate + sudo dpkg-reconfigure man-db + wget -qO- https://download.rethinkdb.com/repository/raw/pubkey.gpg | sudo gpg --dearmor -o /usr/share/keyrings/rethinkdb-archive-keyrings.gpg echo "deb [signed-by=/usr/share/keyrings/rethinkdb-archive-keyrings.gpg] https://download.rethinkdb.com/repository/ubuntu-$(lsb_release -cs) $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/rethinkdb.list sudo apt-get update From db5cc6758a56de8b7c16f2d1766d8ac18b98cb44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 15:42:59 -0700 Subject: [PATCH 06/10] ci: run frontier tests This was skipped before due to flakiness, but it seems to be both reliable and fast enough to be tolerable. It takes about 30 seconds to complete on my local machine. --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b48e3c2..941f828 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -29,4 +29,4 @@ jobs: - name: Run tests run: | - uv run py.test --tb=native --verbose tests/test_cli.py tests/test_units.py + uv run py.test --tb=native --verbose tests/test_cli.py tests/test_frontier.py tests/test_units.py From 60f363ca899c6c73a7017ed9641d77ef49ca66b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 15:51:52 -0700 Subject: [PATCH 07/10] tests: mark frontier perf test xfail This is failing for me in CI, but passing locally. --- tests/test_frontier.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_frontier.py b/tests/test_frontier.py index e04d207..da6f5bf 100644 --- a/tests/test_frontier.py +++ b/tests/test_frontier.py @@ -1052,6 +1052,8 @@ def test_max_claimed_sites_cross_job(rethinker): rr.table("sites").delete().run() +# Works locally, but reliably fails in CI. +@pytest.mark.xfail def test_max_claimed_sites_load_perf(rethinker): rr = rethinker frontier = brozzler.RethinkDbFrontier(rr) From f54e9e382abdf72cf2efa32749eaa311e59365cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 18 Jul 2025 16:37:33 -0700 Subject: [PATCH 08/10] tests: fix invalid escape This made the common mistake of putting `\.` instead of `\\.` in a non-raw string. --- tests/test_units.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_units.py b/tests/test_units.py index a6ff9db..c495ac9 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -177,7 +177,7 @@ def test_robots_connection_failure(): def test_scoping(): test_scope = yaml.safe_load( - """ + r""" max_hops: 100 accepts: - url_match: REGEX_MATCH From 99575b03b442ba0cb37013cbb3a3e39fd688c0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Mon, 21 Jul 2025 11:30:18 -0700 Subject: [PATCH 09/10] ci: always run full test suite We previously ran the full suite, including test_brozzling, on a daily timer because it took an enormous amount of time to run. I'd been under the impression this was because it *had* to take that long to do the work it was performing, but it looks like it hadn't been necessary and the suite has been sped up massively since. We can now run it in about six and a half minutes, which is perfectly fine to run on every PR. --- .github/workflows/daily.yaml | 24 ------------------------ .github/workflows/tests.yml | 2 +- 2 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 .github/workflows/daily.yaml diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml deleted file mode 100644 index 0aa516c..0000000 --- a/.github/workflows/daily.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: Full test suite - -on: - schedule: - - cron: "0 6 * * *" # 10PM Pacific daily - -jobs: - test: - name: Run tests - runs-on: ubuntu-latest - timeout-minutes: 480 - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v6 - - - uses: ./.github/workflows/setup - with: - python-version: '3.12' - - - name: Run tests - run: | - uv run py.test --tb=native --verbose tests diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 941f828..4413c6a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -29,4 +29,4 @@ jobs: - name: Run tests run: | - uv run py.test --tb=native --verbose tests/test_cli.py tests/test_frontier.py tests/test_units.py + uv run py.test --tb=native --verbose tests From 4c77515063d1b613d0f7bd3a09e55f0db667bf42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Mon, 21 Jul 2025 12:27:57 -0700 Subject: [PATCH 10/10] deps: warctools 5.0.0 Needed for the warcprox import to work. --- uv.lock | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index bdf18c4..2f6bb58 100644 --- a/uv.lock +++ b/uv.lock @@ -1044,9 +1044,12 @@ wheels = [ [[package]] name = "warctools" -version = "4.10.0" +version = "5.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e6/5b/17eacaa14dde83dbecb62be44c21c5e9b8f2c709c1da5846e361c3033f3b/warctools-4.10.0.tar.gz", hash = "sha256:ce0c6e274db8ac8810f7c97b3943e8e8deadbc3f5c982db77cddaae2d2ae6170", size = 24619, upload-time = "2016-09-02T16:06:52.31Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/86/fe310be791a32cbcb66a08120fcc7ec761ad9769663477f535066e5b24a2/warctools-5.0.0.tar.gz", hash = "sha256:129ce85272b2d1df12fdb02ed2268ece89ffbeb50db5270dff5941c06862a0f7", size = 27729, upload-time = "2025-05-30T17:25:19.134Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/6a/f69ca682ce765a6f83ec44ca1049b3265fbda3a3a2d09382f87cbcbac00d/warctools-5.0.0-py3-none-any.whl", hash = "sha256:d29fd6e5a620e69fdd3b34f9c767f8fe4c4989bbeda3289267f7a23714976e11", size = 34439, upload-time = "2025-05-30T17:25:18.167Z" }, +] [[package]] name = "websocket-client"