mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-12-16 00:53:49 -05:00
fix github ruff issues
This commit is contained in:
parent
7d58a9ae3b
commit
db17335ffb
4 changed files with 23 additions and 18 deletions
4
.github/workflows/setup/action.yml
vendored
4
.github/workflows/setup/action.yml
vendored
|
|
@ -25,5 +25,7 @@ runs:
|
||||||
|
|
||||||
- name: Install pip dependencies
|
- name: Install pip dependencies
|
||||||
run: |
|
run: |
|
||||||
uv sync --python ${{ inputs.python-version }} --extra rethinkdb --extra warcprox --extra yt-dlp
|
pip install .[rethinkdb,warcprox,yt-dlp,psycopg]
|
||||||
|
# setuptools required by rethinkdb==2.4.9
|
||||||
|
pip install pytest setuptools
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,6 @@ from urllib3.exceptions import ProxyError, TimeoutError
|
||||||
import brozzler
|
import brozzler
|
||||||
import brozzler.browser
|
import brozzler.browser
|
||||||
from brozzler.model import VideoCaptureOptions
|
from brozzler.model import VideoCaptureOptions
|
||||||
from brozzler.ydl import VideoDataClient
|
|
||||||
|
|
||||||
from . import metrics
|
from . import metrics
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,14 @@ import tempfile
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from typing import List
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
import doublethink
|
import doublethink
|
||||||
|
import psycopg
|
||||||
import structlog
|
import structlog
|
||||||
import urlcanon
|
import urlcanon
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
from psycopg_pool import ConnectionPool, PoolTimeout
|
||||||
from yt_dlp.utils import ExtractorError, match_filter_func
|
from yt_dlp.utils import ExtractorError, match_filter_func
|
||||||
|
|
||||||
import brozzler
|
import brozzler
|
||||||
|
|
@ -38,7 +40,6 @@ from . import metrics
|
||||||
|
|
||||||
thread_local = threading.local()
|
thread_local = threading.local()
|
||||||
|
|
||||||
|
|
||||||
PROXY_ATTEMPTS = 4
|
PROXY_ATTEMPTS = 4
|
||||||
YTDLP_WAIT = 10
|
YTDLP_WAIT = 10
|
||||||
YTDLP_MAX_REDIRECTS = 5
|
YTDLP_MAX_REDIRECTS = 5
|
||||||
|
|
@ -52,26 +53,29 @@ logger = structlog.get_logger(logger_name=__name__)
|
||||||
class VideoDataClient:
|
class VideoDataClient:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
|
if VIDEO_DATA_SOURCE and VIDEO_DATA_SOURCE.startswith("postgresql"):
|
||||||
import psycopg
|
|
||||||
from psycopg_pool import ConnectionPool
|
|
||||||
|
|
||||||
pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9)
|
pool = ConnectionPool(VIDEO_DATA_SOURCE, min_size=1, max_size=9)
|
||||||
pool.wait()
|
pool.wait()
|
||||||
logger.info("pg pool ready")
|
logger.info("pg pool ready")
|
||||||
atexit.register(pool.close)
|
# atexit.register(pool.close)
|
||||||
|
|
||||||
self.pool = pool
|
self.pool = pool
|
||||||
|
|
||||||
def _execute_pg_query(
|
def _execute_pg_query(
|
||||||
self, query: str, row_factory=None, fetchone=False, fetchall=False
|
self, query: str, row_factory=None, fetchone=False, fetchall=False
|
||||||
) -> Optional[Any]:
|
) -> Optional[Any]:
|
||||||
with self.pool.connection() as conn:
|
try:
|
||||||
with conn.cursor(row_factory=row_factory) as cur:
|
with self.pool.connection() as conn:
|
||||||
cur.execute(query)
|
with conn.cursor(row_factory=row_factory) as cur:
|
||||||
if fetchone:
|
cur.execute(query)
|
||||||
return cur.fetchone()
|
if fetchone:
|
||||||
if fetchall:
|
return cur.fetchone()
|
||||||
return cur.fetchall()
|
if fetchall:
|
||||||
|
return cur.fetchall()
|
||||||
|
except PoolTimeout as e:
|
||||||
|
logger.warn("hit PoolTimeout: %s", e)
|
||||||
|
self.pool.check()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warn("postgres query failed: %s", e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_pg_video_captures(self, site=None, source=None) -> List[str]:
|
def get_pg_video_captures(self, site=None, source=None) -> List[str]:
|
||||||
|
|
@ -79,7 +83,7 @@ class VideoDataClient:
|
||||||
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
seed = site.metadata.ait_seed_id if site.metadata.ait_seed_id else None
|
||||||
|
|
||||||
# TODO: generalize, maybe make variable?
|
# TODO: generalize, maybe make variable?
|
||||||
containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
|
# containing_page_timestamp_pattern = "2025%" # for future pre-dup additions
|
||||||
|
|
||||||
if source == "youtube":
|
if source == "youtube":
|
||||||
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
|
containing_page_url_pattern = "http://youtube.com/watch%" # yes, video data canonicalization uses "http"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "brozzler"
|
name = "brozzler"
|
||||||
version = "1.7.0"
|
version = "1.7.1"
|
||||||
authors = [
|
authors = [
|
||||||
{ name="Noah Levitt", email="nlevitt@archive.org" },
|
{ name="Noah Levitt", email="nlevitt@archive.org" },
|
||||||
]
|
]
|
||||||
|
|
@ -40,7 +40,7 @@ license = "Apache-2.0"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
yt-dlp = ["yt-dlp[default,curl-cffi]>=2024.7.25"]
|
yt-dlp = ["yt-dlp[default,curl-cffi]>=2024.7.25"]
|
||||||
psycopg = ["psycopg[binary]>=3.2.6"]
|
psycopg = ["psycopg[binary,pool]>=3.2.6"]
|
||||||
dashboard = ["flask>=1.0", "gunicorn>=19.8.1"]
|
dashboard = ["flask>=1.0", "gunicorn>=19.8.1"]
|
||||||
warcprox = ["warcprox>=2.4.31"]
|
warcprox = ["warcprox>=2.4.31"]
|
||||||
rethinkdb = [
|
rethinkdb = [
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue