mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-19 23:35:54 -04:00
add update_page_metrics
This commit is contained in:
parent
7de5b1cbd7
commit
1d63793788
@ -35,6 +35,7 @@ import tempfile
|
||||
import urlcanon
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
import rethinkdb as rdb
|
||||
from . import metrics
|
||||
from . import ydl
|
||||
|
||||
r = rdb.RethinkDB()
|
||||
@ -312,6 +313,12 @@ class BrozzlerWorker:
|
||||
return True
|
||||
|
||||
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
||||
def update_page_metrics(page, outlinks):
|
||||
"""Update page-level Prometheus metrics."""
|
||||
metrics.brozzler_last_page_crawled_time.set_to_current_time()
|
||||
metrics.brozzler_pages_crawled.inc(1)
|
||||
metrics.brozzler_outlinks_found.inc(len(outlinks))
|
||||
|
||||
def _on_screenshot(screenshot_jpeg):
|
||||
if on_screenshot:
|
||||
on_screenshot(screenshot_jpeg)
|
||||
@ -416,6 +423,7 @@ class BrozzlerWorker:
|
||||
)
|
||||
if final_page_url != page.url:
|
||||
page.note_redirect(final_page_url)
|
||||
update_page_metrics(page, outlinks)
|
||||
return outlinks
|
||||
|
||||
def _fetch_url(self, site, url=None, page=None):
|
||||
|
Loading…
x
Reference in New Issue
Block a user