diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml new file mode 100644 index 0000000..bc79677 --- /dev/null +++ b/.github/workflows/daily.yaml @@ -0,0 +1,22 @@ +name: Full test suite + +on: + schedule: + - cron: "0 6 * * *" # 10PM Pacific daily + +jobs: + test: + name: Run tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - uses: ./.github/workflows/setup + + - name: Run tests + run: | + py.test --tb=native --verbose tests diff --git a/.github/workflows/setup/action.yml b/.github/workflows/setup/action.yml new file mode 100644 index 0000000..8367704 --- /dev/null +++ b/.github/workflows/setup/action.yml @@ -0,0 +1,27 @@ +name: Test setup + +runs: + using: composite + steps: + - name: Install apt dependencies + run: | + sudo apt-get update + sudo apt-get install libjpeg-dev chromium-browser + shell: bash + + - name: Set up rethinkdb + run: | + wget -qO- https://download.rethinkdb.com/repository/raw/pubkey.gpg | sudo gpg --dearmor -o /usr/share/keyrings/rethinkdb-archive-keyrings.gpg + echo "deb [signed-by=/usr/share/keyrings/rethinkdb-archive-keyrings.gpg] https://download.rethinkdb.com/repository/ubuntu-$(lsb_release -cs) $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/rethinkdb.list + sudo apt-get update + sudo apt-get install rethinkdb + sudo cp /etc/rethinkdb/default.conf.sample /etc/rethinkdb/instances.d/instance1.conf + sudo /etc/init.d/rethinkdb restart + shell: bash + + - name: Install pip dependencies + run: | + pip install .[rethinkdb,warcprox,yt-dlp] + # setuptools required by rethinkdb==2.4.9 + pip install pytest setuptools + shell: bash diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..fdafbd5 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,31 @@ +name: Tests + +on: + push: + branches: + - main + - master + pull_request: + branches: + - main + - master + +jobs: + test: + name: Run tests + runs-on: ubuntu-latest + strategy: + matrix: + version: ['3.8', '3.12'] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.version }} + + - uses: ./.github/workflows/setup + + - name: Run tests + run: | + py.test --tb=native --verbose tests/test_cli.py tests/test_units.py diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 7798e5f..7df8671 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -19,9 +19,9 @@ limitations under the License. import logging import structlog -from pkg_resources import get_distribution as _get_distribution +from importlib.metadata import version as _version -__version__ = _get_distribution("brozzler").version +__version__ = _version("brozzler") class ShutdownRequested(Exception): diff --git a/brozzler/worker.py b/brozzler/worker.py index 5d4b03e..8014dfd 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -605,7 +605,7 @@ class BrozzlerWorker: if page: # Calculate backoff in seconds based on number of failed attempts. # Minimum of 60, max of 135 giving delays of 60, 90, 135, 135... - retry_delay = min(135, 60 * (1.5**page.failed_attempts)) + retry_delay = min(135, 60 * (1.5 ** (page.failed_attempts or 0))) page.retry_after = doublethink.utcnow() + datetime.timedelta( seconds=retry_delay ) diff --git a/setup.py b/setup.py index f55b90d..1c8d710 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,9 @@ setuptools.setup( extras_require={ "yt-dlp": ["yt-dlp>=2024.7.25"], "dashboard": ["flask>=1.0", "gunicorn>=19.8.1"], + "warcprox": [ + "warcprox>=2.4.31", + ], "rethinkdb": [ "rethinkdb==2.4.9", "doublethink==0.4.9", diff --git a/tests/test_cli.py b/tests/test_cli.py index 567260f..076a466 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,14 +18,24 @@ limitations under the License. """ import brozzler.cli -import pkg_resources +import importlib.metadata import pytest import subprocess import doublethink +def console_scripts(): + # We do a dict comprehension here because the select filters aren't + # available until Python 3.10's importlib. + return { + ep.name: ep + for ep in importlib.metadata.distribution("brozzler").entry_points + if ep.group == "console_scripts" + } + + def cli_commands(): - commands = set(pkg_resources.get_entry_map("brozzler")["console_scripts"].keys()) + commands = set(console_scripts().keys()) commands.remove("brozzler-wayback") try: import gunicorn @@ -40,8 +50,8 @@ def cli_commands(): @pytest.mark.parametrize("cmd", cli_commands()) def test_call_entrypoint(capsys, cmd): - entrypoint = pkg_resources.get_entry_map("brozzler")["console_scripts"][cmd] - callable = entrypoint.resolve() + entrypoint = console_scripts()[cmd] + callable = entrypoint.load() with pytest.raises(SystemExit): callable(["/whatever/bin/%s" % cmd, "--version"]) out, err = capsys.readouterr() diff --git a/tests/test_units.py b/tests/test_units.py index 27a89a7..096f632 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -260,6 +260,9 @@ blocks: ) +# Some changes to the brozzler ydl interface not represented in this test +# https://github.com/internetarchive/brozzler/issues/330 +@pytest.mark.xfail def test_proxy_down(): """ Test all fetching scenarios raise `brozzler.ProxyError` when proxy is down. @@ -471,6 +474,9 @@ def test_thread_raise_second_with_block(): assert isinstance(thread_caught_exception, Exception2) +# brozzler.ydl.YoutubeDLSpy is missing +# https://github.com/internetarchive/brozzler/issues/330 +@pytest.mark.xfail def test_needs_browsing(): # only one test case here right now, which exposed a bug