mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-23 06:50:37 -04:00
add update_page_metrics
This commit is contained in:
parent
7de5b1cbd7
commit
1d63793788
1 changed files with 8 additions and 0 deletions
|
@ -35,6 +35,7 @@ import tempfile
|
||||||
import urlcanon
|
import urlcanon
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
import rethinkdb as rdb
|
import rethinkdb as rdb
|
||||||
|
from . import metrics
|
||||||
from . import ydl
|
from . import ydl
|
||||||
|
|
||||||
r = rdb.RethinkDB()
|
r = rdb.RethinkDB()
|
||||||
|
@ -312,6 +313,12 @@ class BrozzlerWorker:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
def _browse_page(self, browser, site, page, on_screenshot=None, on_request=None):
|
||||||
|
def update_page_metrics(page, outlinks):
|
||||||
|
"""Update page-level Prometheus metrics."""
|
||||||
|
metrics.brozzler_last_page_crawled_time.set_to_current_time()
|
||||||
|
metrics.brozzler_pages_crawled.inc(1)
|
||||||
|
metrics.brozzler_outlinks_found.inc(len(outlinks))
|
||||||
|
|
||||||
def _on_screenshot(screenshot_jpeg):
|
def _on_screenshot(screenshot_jpeg):
|
||||||
if on_screenshot:
|
if on_screenshot:
|
||||||
on_screenshot(screenshot_jpeg)
|
on_screenshot(screenshot_jpeg)
|
||||||
|
@ -416,6 +423,7 @@ class BrozzlerWorker:
|
||||||
)
|
)
|
||||||
if final_page_url != page.url:
|
if final_page_url != page.url:
|
||||||
page.note_redirect(final_page_url)
|
page.note_redirect(final_page_url)
|
||||||
|
update_page_metrics(page, outlinks)
|
||||||
return outlinks
|
return outlinks
|
||||||
|
|
||||||
def _fetch_url(self, site, url=None, page=None):
|
def _fetch_url(self, site, url=None, page=None):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue