diff --git a/brozzler/metrics.py b/brozzler/metrics.py new file mode 100644 index 0000000..5203c51 --- /dev/null +++ b/brozzler/metrics.py @@ -0,0 +1,48 @@ +from typing import Optional + +""" +from http_sd_registry.client import ( + Client, + Env, + Registration, + Scheme, + format_self_target, +) +from http_sd_registry.config import ClientConfig +""" +from prometheus_client import Counter, Gauge, Histogram, start_http_server + +# fmt: off +brozzler_pages_crawled = Counter("brozzler_pages_crawled", "number of pages visited by brozzler") +brozzler_page_processing_duration_seconds = Histogram("brozzler_page_processing_duration_seconds", "time spent processing a page in brozzler") +brozzler_outlinks_found = Counter("brozzler_urls_found", "number of outlinks found by brozzler") +brozzler_last_page_crawled_time = Gauge("brozzler_last_page_crawled_time", "time of last page visit") +brozzler_in_progress_pages = Gauge("brozzler_in_progress_pages", "number of pages currently processing with brozzler") +brozzler_resources_requested = Counter("brozzler_resources_requested", "number of resources requested", labelnames=["resource_type"]) +brozzler_resources_fetched = Counter("brozzler_resources_fetched", "number of resources fetched", labelnames=["resource_type", "status_code"]) +brozzler_resources_size_total = Counter("brozzler_resources_size_total", "total size of resources fetched", labelnames=["resource_type"]) +brozzler_resources_fetch_time = Counter("brozzler_resources_fetch_time", "time spent fetching resources", labelnames=["resource_type"]) +brozzler_ydl_urls_checked = Counter("brozzler_ydl_urls_checked", "count of urls checked by brozzler yt-dlp") +brozzler_ydl_download_attempts= Counter("brozzler_ydl_download_attempts", "count of download attempted by brozzler yt-dlp") +brozzler_ydl_download_successes= Counter("brozzler_ydl_download_successes", "count of downloads completed by brozzler yt-dlp") +# fmt: on + + +def register_prom_metrics( + registry_url: Optional[str] = None, metrics_port: int = 8888, env: Env = Env.qa +): + # Start metrics endpoint for scraping + start_http_server(metrics_port) + + if registry_url is None: + return + + config = ClientConfig(server_url_base=registry_url) + client = Client(config) + target = format_self_target(scrape_port=metrics_port) + registration = Registration( + target=target, + env=env, + scheme=Scheme.http, + ) + client.keep_registered_threaded(registration) diff --git a/setup.py b/setup.py index c275b2b..edca942 100644 --- a/setup.py +++ b/setup.py @@ -77,6 +77,8 @@ setuptools.setup( "jinja2>=2.10", "cryptography>=2.3", "python-magic>=0.4.15", + "prometheus-client>=0.20.0", + "prometheus-async>=22.2.0", ], extras_require={ "yt-dlp": ["yt-dlp==2024.7.25"],