fix github ruff issues

This commit is contained in:
Barbara Miller 2025-06-30 15:10:14 -07:00
parent 7d58a9ae3b
commit db17335ffb
4 changed files with 23 additions and 18 deletions

View file

@ -25,5 +25,7 @@ runs:
- name: Install pip dependencies - name: Install pip dependencies
run: | run: |
uv sync --python ${{ inputs.python-version }} --extra rethinkdb --extra warcprox --extra yt-dlp pip install .[rethinkdb,warcprox,yt-dlp,psycopg]
# setuptools required by rethinkdb==2.4.9
pip install pytest setuptools
shell: bash shell: bash

View file

@ -39,7 +39,6 @@ from urllib3.exceptions import ProxyError, TimeoutError
import brozzler import brozzler
import brozzler.browser import brozzler.browser
from brozzler.model import VideoCaptureOptions from brozzler.model import VideoCaptureOptions
from brozzler.ydl import VideoDataClient
from . import metrics from . import metrics

View file

@ -24,12 +24,14 @@ import tempfile
import threading import threading
import time import time
import urllib.request import urllib.request
from typing import List from typing import Any, List, Optional
import doublethink import doublethink
import psycopg
import structlog import structlog
import urlcanon import urlcanon
import yt_dlp import yt_dlp
from psycopg_pool import ConnectionPool, PoolTimeout
from yt_dlp.utils import ExtractorError, match_filter_func from yt_dlp.utils import ExtractorError, match_filter_func
import brozzler import brozzler
@ -38,7 +40,6 @@ from . import metrics
thread_local = threading.local() thread_local = threading.local()
PROXY_ATTEMPTS = 4 PROXY_ATTEMPTS = 4
YTDLP_WAIT = 10 YTDLP_WAIT = 10
YTDLP_MAX_REDIRECTS = 5 YTDLP_MAX_REDIRECTS = 5
@ -52,19 +53,17 @@ logger = structlog.get_logger(logger_name=__name__)
class VideoDataClient: class VideoDataClient:
def __init__(self): def __init__(self):
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"): if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
import psycopg
from psycopg_pool import ConnectionPool
pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9) pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9)
pool.wait() pool.wait()
logger.info("pg pool ready") logger.info("pg pool ready")
atexit.register(pool.close) # atexit.register(pool.close)
self.pool = pool self.pool = pool
def _execute_pg_query( def _execute_pg_query(
self, query: str, row_factory=None, fetchone=False, fetchall=False self, query: str, row_factory=None, fetchone=False, fetchall=False
) -> Optional[Any]: ) -> Optional[Any]:
try:
with self.pool.connection() as conn: with self.pool.connection() as conn:
with conn.cursor(row_factory=row_factory) as cur: with conn.cursor(row_factory=row_factory) as cur:
cur.execute(query) cur.execute(query)
@ -72,6 +71,11 @@ class VideoDataClient:
return cur.fetchone() return cur.fetchone()
if fetchall: if fetchall:
return cur.fetchall() return cur.fetchall()
except PoolTimeout as e:
logger.warn("hit PoolTimeout: %s", e)
self.pool.check()
except Exception as e:
logger.warn("postgres query failed: %s", e)
return None return None
def get_pg_video_captures(self, site=None, source=None) -> List[str]: def get_pg_video_captures(self, site=None, source=None) -> List[str]:
@ -79,7 +83,7 @@ class VideoDataClient:
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
# TODO: generalize, maybe make variable? # TODO: generalize, maybe make variable?
containing_page_timestamp_pattern = "2025%" # for future pre-dup additions # containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
if source == "youtube": if source == "youtube":
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http" containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"

View file

@ -1,6 +1,6 @@
[project] [project]
name = "brozzler" name = "brozzler"
version = "1.7.0" version = "1.7.1"
authors = [ authors = [
{ name="Noah Levitt", email="nlevitt@archive.org" }, { name="Noah Levitt", email="nlevitt@archive.org" },
] ]
@ -40,7 +40,7 @@ license = "Apache-2.0"
[project.optional-dependencies] [project.optional-dependencies]
yt-dlp = ["yt-dlp[default,curl-cffi]>=2024.7.25"] yt-dlp = ["yt-dlp[default,curl-cffi]>=2024.7.25"]
psycopg = ["psycopg[binary]>=3.2.6"] psycopg = ["psycopg[binary,pool]>=3.2.6"]
dashboard = ["flask>=1.0", "gunicorn>=19.8.1"] dashboard = ["flask>=1.0", "gunicorn>=19.8.1"]
warcprox = ["warcprox>=2.4.31"] warcprox = ["warcprox>=2.4.31"]
rethinkdb = [ rethinkdb = [