mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-02 11:36:15 -04:00
fix github ruff issues
This commit is contained in:
parent
7d58a9ae3b
commit
db17335ffb
4 changed files with 23 additions and 18 deletions
4
.github/workflows/setup/action.yml
vendored
4
.github/workflows/setup/action.yml
vendored
|
@ -25,5 +25,7 @@ runs:
|
|||
|
||||
- name: Install pip dependencies
|
||||
run: |
|
||||
uv sync --python ${{ inputs.python-version }} --extra rethinkdb --extra warcprox --extra yt-dlp
|
||||
pip install .[rethinkdb,warcprox,yt-dlp,psycopg]
|
||||
# setuptools required by rethinkdb==2.4.9
|
||||
pip install pytest setuptools
|
||||
shell: bash
|
||||
|
|
|
@ -39,7 +39,6 @@ from urllib3.exceptions import ProxyError, TimeoutError
|
|||
import brozzler
|
||||
import brozzler.browser
|
||||
from brozzler.model import VideoCaptureOptions
|
||||
from brozzler.ydl import VideoDataClient
|
||||
|
||||
from . import metrics
|
||||
|
||||
|
|
|
@ -24,12 +24,14 @@ import tempfile
|
|||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
from typing import List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import doublethink
|
||||
import psycopg
|
||||
import structlog
|
||||
import urlcanon
|
||||
import yt_dlp
|
||||
from psycopg_pool import ConnectionPool, PoolTimeout
|
||||
from yt_dlp.utils import ExtractorError, match_filter_func
|
||||
|
||||
import brozzler
|
||||
|
@ -38,7 +40,6 @@ from . import metrics
|
|||
|
||||
thread_local = threading.local()
|
||||
|
||||
|
||||
PROXY_ATTEMPTS = 4
|
||||
YTDLP_WAIT = 10
|
||||
YTDLP_MAX_REDIRECTS = 5
|
||||
|
@ -52,26 +53,29 @@ logger = structlog.get_logger(logger_name=__name__)
|
|||
class VideoDataClient:
|
||||
def __init__(self):
|
||||
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
|
||||
import psycopg
|
||||
from psycopg_pool import ConnectionPool
|
||||
|
||||
pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9)
|
||||
pool.wait()
|
||||
logger.info("pg pool ready")
|
||||
atexit.register(pool.close)
|
||||
# atexit.register(pool.close)
|
||||
|
||||
self.pool = pool
|
||||
|
||||
def _execute_pg_query(
|
||||
self, query: str, row_factory=None, fetchone=False, fetchall=False
|
||||
) -> Optional[Any]:
|
||||
with self.pool.connection() as conn:
|
||||
with conn.cursor(row_factory=row_factory) as cur:
|
||||
cur.execute(query)
|
||||
if fetchone:
|
||||
return cur.fetchone()
|
||||
if fetchall:
|
||||
return cur.fetchall()
|
||||
try:
|
||||
with self.pool.connection() as conn:
|
||||
with conn.cursor(row_factory=row_factory) as cur:
|
||||
cur.execute(query)
|
||||
if fetchone:
|
||||
return cur.fetchone()
|
||||
if fetchall:
|
||||
return cur.fetchall()
|
||||
except PoolTimeout as e:
|
||||
logger.warn("hit PoolTimeout: %s", e)
|
||||
self.pool.check()
|
||||
except Exception as e:
|
||||
logger.warn("postgres query failed: %s", e)
|
||||
return None
|
||||
|
||||
def get_pg_video_captures(self, site=None, source=None) -> List[str]:
|
||||
|
@ -79,7 +83,7 @@ class VideoDataClient:
|
|||
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
||||
|
||||
# TODO: generalize, maybe make variable?
|
||||
containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
|
||||
# containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
|
||||
|
||||
if source == "youtube":
|
||||
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "brozzler"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
authors = [
|
||||
{ name="Noah Levitt", email="nlevitt@archive.org" },
|
||||
]
|
||||
|
@ -40,7 +40,7 @@ license = "Apache-2.0"
|
|||
|
||||
[project.optional-dependencies]
|
||||
yt-dlp = ["yt-dlp[default,curl-cffi]>=2024.7.25"]
|
||||
psycopg = ["psycopg[binary]>=3.2.6"]
|
||||
psycopg = ["psycopg[binary,pool]>=3.2.6"]
|
||||
dashboard = ["flask>=1.0", "gunicorn>=19.8.1"]
|
||||
warcprox = ["warcprox>=2.4.31"]
|
||||
rethinkdb = [
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue