mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
setup registry_url, metrics_port, env vars and CLI args
This commit is contained in:
parent
80ce6c0ea6
commit
7b6c306d14
@ -165,6 +165,20 @@ class BetterArgumentDefaultsHelpFormatter(argparse.ArgumentDefaultsHelpFormatter
|
||||
return super()._get_help_string(action)
|
||||
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class Env(str, enum.Enum):
|
||||
"""Values of the Prometheus ``env`` label applied to a
|
||||
:py:class:`.Registration` indicating the deployment environment in which
|
||||
the service being advertised is operating.
|
||||
"""
|
||||
|
||||
qa = "qa"
|
||||
prod = "prod"
|
||||
dev = "dev"
|
||||
|
||||
|
||||
def brozzle_page(argv=None):
|
||||
"""
|
||||
Command line utility entry point for brozzling a single page. Opens url in
|
||||
@ -234,6 +248,24 @@ def brozzle_page(argv=None):
|
||||
action="store_true",
|
||||
help="Try to avoid web bot detection",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--registry_url",
|
||||
dest="registry_url",
|
||||
default=None,
|
||||
help="Prometheus registry url",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--metrics_port",
|
||||
dest=metrics_port,
|
||||
default=8889,
|
||||
help="Prometheus metrics port",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--env",
|
||||
dest=env,
|
||||
default=Env.dev,
|
||||
help="Prometheus env value",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--screenshot-full-page", dest="screenshot_full_page", action="store_true"
|
||||
)
|
||||
@ -279,6 +311,9 @@ def brozzle_page(argv=None):
|
||||
window_height=args.window_height,
|
||||
window_width=args.window_width,
|
||||
stealth=args.stealth,
|
||||
registry_url=args.registry_url,
|
||||
metrics_port=args.metrics_port,
|
||||
env=args.env,
|
||||
)
|
||||
|
||||
def on_screenshot(screenshot_jpeg):
|
||||
@ -517,6 +552,24 @@ def brozzler_worker(argv=None):
|
||||
action="store_true",
|
||||
help="Try to avoid web bot detection",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--registry_url",
|
||||
dest="registry_url",
|
||||
default=None,
|
||||
help="Prometheus registry url",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--metrics_port",
|
||||
dest=metrics_port,
|
||||
default=8888,
|
||||
help="Prometheus metrics port",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--env",
|
||||
dest=env,
|
||||
default=Env.qa,
|
||||
help="Prometheus env value",
|
||||
)
|
||||
add_common_options(arg_parser, argv)
|
||||
|
||||
args = arg_parser.parse_args(args=argv[1:])
|
||||
@ -573,6 +626,9 @@ def brozzler_worker(argv=None):
|
||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||
skip_youtube_dl=args.skip_youtube_dl,
|
||||
stealth=args.stealth,
|
||||
registry_url=args.registry_url,
|
||||
metrics_port=args.metrics_port,
|
||||
env=args.env,
|
||||
)
|
||||
|
||||
signal.signal(signal.SIGQUIT, dump_state)
|
||||
|
@ -18,7 +18,7 @@ except ImportError:
|
||||
class Env(str, enum.Enum):
|
||||
"""Values of the Prometheus ``env`` label applied to a
|
||||
:py:class:`.Registration` indicating the deployment environment in which
|
||||
the the service being advertised is operating.
|
||||
the service being advertised is operating.
|
||||
"""
|
||||
|
||||
qa = "qa"
|
||||
|
@ -28,7 +28,6 @@ import json
|
||||
import PIL.Image
|
||||
import io
|
||||
import socket
|
||||
import platform
|
||||
import random
|
||||
import requests
|
||||
import doublethink
|
||||
@ -42,18 +41,6 @@ from . import ydl
|
||||
r = rdb.RethinkDB()
|
||||
|
||||
|
||||
# Setup metrics
|
||||
registry_url = None
|
||||
metrics_port = 8090
|
||||
env = metrics.Env.dev
|
||||
hostname = platform.node()
|
||||
if hostname.endswith("archive.org"):
|
||||
registry_url = "http://wbgrp-svc283.us.archive.org:8888"
|
||||
metrics_port = settings.metrics_port
|
||||
env = metrics.Env.qa
|
||||
metrics.register_prom_metrics(registry_url, metrics_port, env)
|
||||
|
||||
|
||||
class BrozzlerWorker:
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
|
||||
@ -85,6 +72,9 @@ class BrozzlerWorker:
|
||||
stealth=False,
|
||||
window_height=900,
|
||||
window_width=1400,
|
||||
registry_url=None,
|
||||
metrics_port=None,
|
||||
env=None,
|
||||
):
|
||||
self._frontier = frontier
|
||||
self._service_registry = service_registry
|
||||
@ -107,6 +97,9 @@ class BrozzlerWorker:
|
||||
self._window_height = window_height
|
||||
self._window_width = window_width
|
||||
self._stealth = stealth
|
||||
self._registry_url = registry_url
|
||||
self._metrics_port = metrics_port
|
||||
self._env = env
|
||||
|
||||
self._browser_pool = brozzler.browser.BrowserPool(
|
||||
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True
|
||||
@ -118,6 +111,12 @@ class BrozzlerWorker:
|
||||
self._start_stop_lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
|
||||
# Setup metrics
|
||||
registry_url = self._registry_url
|
||||
metrics_port = self._metrics_port
|
||||
env = self._env
|
||||
metrics.register_prom_metrics(registry_url, metrics_port, env)
|
||||
|
||||
def _choose_warcprox(self):
|
||||
warcproxes = self._service_registry.available_services("warcprox")
|
||||
if not warcproxes:
|
||||
|
@ -297,7 +297,7 @@ def _remember_videos(page, pushed_videos=None):
|
||||
|
||||
def _try_youtube_dl(worker, ydl, site, page):
|
||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||
ytdlp_host = ytdlp_url.split("//")[-1].split("/")[0].split('?')[0]
|
||||
ytdlp_host = ytdlp_url.split("//")[-1].split("/")[0].split("?")[0]
|
||||
try:
|
||||
logging.info("trying yt-dlp on %s", ytdlp_url)
|
||||
metrics.brozzler_ydl_download_attempts.labels(ytdlp_host).inc(1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user