mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-09-23 06:04:47 -04:00
setup registry_url, metrics_port, env vars and CLI args
This commit is contained in:
parent
80ce6c0ea6
commit
7b6c306d14
4 changed files with 70 additions and 15 deletions
|
@ -165,6 +165,20 @@ class BetterArgumentDefaultsHelpFormatter(argparse.ArgumentDefaultsHelpFormatter
|
||||||
return super()._get_help_string(action)
|
return super()._get_help_string(action)
|
||||||
|
|
||||||
|
|
||||||
|
import enum
|
||||||
|
|
||||||
|
|
||||||
|
class Env(str, enum.Enum):
|
||||||
|
"""Values of the Prometheus ``env`` label applied to a
|
||||||
|
:py:class:`.Registration` indicating the deployment environment in which
|
||||||
|
the service being advertised is operating.
|
||||||
|
"""
|
||||||
|
|
||||||
|
qa = "qa"
|
||||||
|
prod = "prod"
|
||||||
|
dev = "dev"
|
||||||
|
|
||||||
|
|
||||||
def brozzle_page(argv=None):
|
def brozzle_page(argv=None):
|
||||||
"""
|
"""
|
||||||
Command line utility entry point for brozzling a single page. Opens url in
|
Command line utility entry point for brozzling a single page. Opens url in
|
||||||
|
@ -234,6 +248,24 @@ def brozzle_page(argv=None):
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Try to avoid web bot detection",
|
help="Try to avoid web bot detection",
|
||||||
)
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--registry_url",
|
||||||
|
dest="registry_url",
|
||||||
|
default=None,
|
||||||
|
help="Prometheus registry url",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--metrics_port",
|
||||||
|
dest=metrics_port,
|
||||||
|
default=8889,
|
||||||
|
help="Prometheus metrics port",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--env",
|
||||||
|
dest=env,
|
||||||
|
default=Env.dev,
|
||||||
|
help="Prometheus env value",
|
||||||
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--screenshot-full-page", dest="screenshot_full_page", action="store_true"
|
"--screenshot-full-page", dest="screenshot_full_page", action="store_true"
|
||||||
)
|
)
|
||||||
|
@ -279,6 +311,9 @@ def brozzle_page(argv=None):
|
||||||
window_height=args.window_height,
|
window_height=args.window_height,
|
||||||
window_width=args.window_width,
|
window_width=args.window_width,
|
||||||
stealth=args.stealth,
|
stealth=args.stealth,
|
||||||
|
registry_url=args.registry_url,
|
||||||
|
metrics_port=args.metrics_port,
|
||||||
|
env=args.env,
|
||||||
)
|
)
|
||||||
|
|
||||||
def on_screenshot(screenshot_jpeg):
|
def on_screenshot(screenshot_jpeg):
|
||||||
|
@ -517,6 +552,24 @@ def brozzler_worker(argv=None):
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Try to avoid web bot detection",
|
help="Try to avoid web bot detection",
|
||||||
)
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--registry_url",
|
||||||
|
dest="registry_url",
|
||||||
|
default=None,
|
||||||
|
help="Prometheus registry url",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--metrics_port",
|
||||||
|
dest=metrics_port,
|
||||||
|
default=8888,
|
||||||
|
help="Prometheus metrics port",
|
||||||
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--env",
|
||||||
|
dest=env,
|
||||||
|
default=Env.qa,
|
||||||
|
help="Prometheus env value",
|
||||||
|
)
|
||||||
add_common_options(arg_parser, argv)
|
add_common_options(arg_parser, argv)
|
||||||
|
|
||||||
args = arg_parser.parse_args(args=argv[1:])
|
args = arg_parser.parse_args(args=argv[1:])
|
||||||
|
@ -573,6 +626,9 @@ def brozzler_worker(argv=None):
|
||||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||||
skip_youtube_dl=args.skip_youtube_dl,
|
skip_youtube_dl=args.skip_youtube_dl,
|
||||||
stealth=args.stealth,
|
stealth=args.stealth,
|
||||||
|
registry_url=args.registry_url,
|
||||||
|
metrics_port=args.metrics_port,
|
||||||
|
env=args.env,
|
||||||
)
|
)
|
||||||
|
|
||||||
signal.signal(signal.SIGQUIT, dump_state)
|
signal.signal(signal.SIGQUIT, dump_state)
|
||||||
|
|
|
@ -18,7 +18,7 @@ except ImportError:
|
||||||
class Env(str, enum.Enum):
|
class Env(str, enum.Enum):
|
||||||
"""Values of the Prometheus ``env`` label applied to a
|
"""Values of the Prometheus ``env`` label applied to a
|
||||||
:py:class:`.Registration` indicating the deployment environment in which
|
:py:class:`.Registration` indicating the deployment environment in which
|
||||||
the the service being advertised is operating.
|
the service being advertised is operating.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
qa = "qa"
|
qa = "qa"
|
||||||
|
|
|
@ -28,7 +28,6 @@ import json
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
import io
|
import io
|
||||||
import socket
|
import socket
|
||||||
import platform
|
|
||||||
import random
|
import random
|
||||||
import requests
|
import requests
|
||||||
import doublethink
|
import doublethink
|
||||||
|
@ -42,18 +41,6 @@ from . import ydl
|
||||||
r = rdb.RethinkDB()
|
r = rdb.RethinkDB()
|
||||||
|
|
||||||
|
|
||||||
# Setup metrics
|
|
||||||
registry_url = None
|
|
||||||
metrics_port = 8090
|
|
||||||
env = metrics.Env.dev
|
|
||||||
hostname = platform.node()
|
|
||||||
if hostname.endswith("archive.org"):
|
|
||||||
registry_url = "http://wbgrp-svc283.us.archive.org:8888"
|
|
||||||
metrics_port = settings.metrics_port
|
|
||||||
env = metrics.Env.qa
|
|
||||||
metrics.register_prom_metrics(registry_url, metrics_port, env)
|
|
||||||
|
|
||||||
|
|
||||||
class BrozzlerWorker:
|
class BrozzlerWorker:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
|
@ -85,6 +72,9 @@ class BrozzlerWorker:
|
||||||
stealth=False,
|
stealth=False,
|
||||||
window_height=900,
|
window_height=900,
|
||||||
window_width=1400,
|
window_width=1400,
|
||||||
|
registry_url=None,
|
||||||
|
metrics_port=None,
|
||||||
|
env=None,
|
||||||
):
|
):
|
||||||
self._frontier = frontier
|
self._frontier = frontier
|
||||||
self._service_registry = service_registry
|
self._service_registry = service_registry
|
||||||
|
@ -107,6 +97,9 @@ class BrozzlerWorker:
|
||||||
self._window_height = window_height
|
self._window_height = window_height
|
||||||
self._window_width = window_width
|
self._window_width = window_width
|
||||||
self._stealth = stealth
|
self._stealth = stealth
|
||||||
|
self._registry_url = registry_url
|
||||||
|
self._metrics_port = metrics_port
|
||||||
|
self._env = env
|
||||||
|
|
||||||
self._browser_pool = brozzler.browser.BrowserPool(
|
self._browser_pool = brozzler.browser.BrowserPool(
|
||||||
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True
|
max_browsers, chrome_exe=chrome_exe, ignore_cert_errors=True
|
||||||
|
@ -118,6 +111,12 @@ class BrozzlerWorker:
|
||||||
self._start_stop_lock = threading.Lock()
|
self._start_stop_lock = threading.Lock()
|
||||||
self._shutdown = threading.Event()
|
self._shutdown = threading.Event()
|
||||||
|
|
||||||
|
# Setup metrics
|
||||||
|
registry_url = self._registry_url
|
||||||
|
metrics_port = self._metrics_port
|
||||||
|
env = self._env
|
||||||
|
metrics.register_prom_metrics(registry_url, metrics_port, env)
|
||||||
|
|
||||||
def _choose_warcprox(self):
|
def _choose_warcprox(self):
|
||||||
warcproxes = self._service_registry.available_services("warcprox")
|
warcproxes = self._service_registry.available_services("warcprox")
|
||||||
if not warcproxes:
|
if not warcproxes:
|
||||||
|
|
|
@ -297,7 +297,7 @@ def _remember_videos(page, pushed_videos=None):
|
||||||
|
|
||||||
def _try_youtube_dl(worker, ydl, site, page):
|
def _try_youtube_dl(worker, ydl, site, page):
|
||||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||||
ytdlp_host = ytdlp_url.split("//")[-1].split("/")[0].split('?')[0]
|
ytdlp_host = ytdlp_url.split("//")[-1].split("/")[0].split("?")[0]
|
||||||
try:
|
try:
|
||||||
logging.info("trying yt-dlp on %s", ytdlp_url)
|
logging.info("trying yt-dlp on %s", ytdlp_url)
|
||||||
metrics.brozzler_ydl_download_attempts.labels(ytdlp_host).inc(1)
|
metrics.brozzler_ydl_download_attempts.labels(ytdlp_host).inc(1)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue