browser: convert to structlog

This commit is contained in:
Misty De Méo 2025-02-18 12:44:22 -08:00
parent a7e915b35f
commit 97f225d54c

View File

@ -31,6 +31,7 @@ import base64
from ipaddress import AddressValueError
from brozzler.chrome import Chrome
import socket
import structlog
import urlcanon
@ -52,7 +53,7 @@ class BrowserPool:
debugging protocol.
"""
logger = logging.getLogger(__module__ + "." + __qualname__)
logger = structlog.get_logger(__module__ + "." + __qualname__)
def __init__(self, size=3, **kwargs):
"""
@ -143,7 +144,7 @@ class BrowserPool:
class WebsockReceiverThread(threading.Thread):
logger = logging.getLogger(__module__ + "." + __qualname__)
logger = structlog.get_logger(__module__ + "." + __qualname__)
def __init__(self, websock, name=None, daemon=True):
super().__init__(name=name, daemon=daemon)
@ -193,7 +194,7 @@ class WebsockReceiverThread(threading.Thread):
):
self.logger.error("websocket closed, did chrome die?")
else:
self.logger.error("exception from websocket receiver thread", exc_info=1)
self.logger.exception("exception from websocket receiver thread")
brozzler.thread_raise(self.calling_thread, BrowsingException)
def run(self):
@ -213,10 +214,9 @@ class WebsockReceiverThread(threading.Thread):
try:
self._handle_message(websock, message)
except:
self.logger.error(
"uncaught exception in _handle_message message=%s",
message,
exc_info=True,
self.logger.exception(
"uncaught exception in _handle_message",
message=message,
)
def _network_response_received(self, message):
@ -231,7 +231,7 @@ class WebsockReceiverThread(threading.Thread):
]
)
self.reached_limit = brozzler.ReachedLimit(warcprox_meta=warcprox_meta)
self.logger.info("reached limit %s", self.reached_limit)
self.logger.info("reached limit", limit=self.reached_limit)
brozzler.thread_raise(self.calling_thread, brozzler.ReachedLimit)
else:
self.logger.info(
@ -245,7 +245,7 @@ class WebsockReceiverThread(threading.Thread):
self.page_status = status
def _javascript_dialog_opening(self, message):
self.logger.info("javascript dialog opened: %s", message)
self.logger.info("javascript dialog opened", message=message)
if message["params"]["type"] == "alert":
accept = True
else:
@ -292,7 +292,7 @@ class WebsockReceiverThread(threading.Thread):
message["params"]["message"]["text"],
)
elif message["method"] == "Runtime.exceptionThrown":
self.logger.debug("uncaught exception: %s", message)
self.logger.debug("uncaught exception", exception=message)
elif message["method"] == "Page.javascriptDialogOpening":
self._javascript_dialog_opening(message)
elif (
@ -322,7 +322,7 @@ class Browser:
Manages an instance of Chrome for browsing pages.
"""
logger = logging.getLogger(__module__ + "." + __qualname__)
logger = structlog.get_logger(__module__ + "." + __qualname__)
def __init__(self, **kwargs):
"""
@ -365,11 +365,10 @@ class Browser:
msg_id = next(self._command_id)
kwargs["id"] = msg_id
msg = json.dumps(kwargs, separators=",:")
logging.log(
logging.TRACE if suppress_logging else logging.DEBUG,
"sending message to %s: %s",
self.websock,
msg,
self.logger.debug(
"sending message",
websock=self.websock,
message=msg,
)
self.websock.send(msg)
return msg_id
@ -397,7 +396,7 @@ class Browser:
# Enable Console & Runtime output only when debugging.
# After all, we just print these events with debug(), we don't use
# them in Brozzler logic.
if self.logger.isEnabledFor(logging.DEBUG):
if self.logger.is_enabled_for(logging.DEBUG):
self.send_to_chrome(method="Console.enable")
self.send_to_chrome(method="Runtime.enable")
self.send_to_chrome(method="ServiceWorker.enable")
@ -432,8 +431,8 @@ class Browser:
try:
self.websock.close()
except BaseException as e:
self.logger.error(
"exception closing websocket %s - %s", self.websock, e
self.logger.exception(
"exception closing websocket", websocket=self.websock
)
self.chrome.stop()
@ -460,7 +459,7 @@ class Browser:
self.websock_url = None
except:
self.logger.error("problem stopping", exc_info=True)
self.logger.exception("problem stopping")
def is_running(self):
return self.websock_url is not None
@ -566,7 +565,7 @@ class Browser:
# if login redirected us, return to page_url
if page_url != self.url().split("#")[0]:
self.logger.debug(
"login navigated away from %s; returning!", page_url
"login navigated away; returning!", page_url=page_url
)
self.navigate_to_page(page_url, timeout=page_timeout)
# If the target page HTTP status is 4xx/5xx, there is no point
@ -608,7 +607,7 @@ class Browser:
# more information, raise that one
raise self.websock_thread.reached_limit
except websocket.WebSocketConnectionClosedException as e:
self.logger.error("websocket closed, did chrome die?")
self.logger.exception("websocket closed, did chrome die?")
raise BrowsingException(e)
finally:
self.is_browsing = False
@ -630,7 +629,7 @@ class Browser:
on_screenshot(jpeg_bytes)
return
except BrowsingTimeout as e:
logging.error("attempt %s/3: %s", i + 1, e)
self.logger.exception("attempt %s/3", i + 1)
def visit_hashtags(self, page_url, hashtags, outlinks):
_hashtags = set(hashtags or [])
@ -644,7 +643,7 @@ class Browser:
# out which hashtags were visited already and skip those
for hashtag in _hashtags:
# navigate_to_hashtag (nothing to wait for so no timeout?)
self.logger.debug("navigating to hashtag %s", hashtag)
self.logger.debug("navigating to hashtag", hashtag=hashtag)
url = urlcanon.whatwg(page_url)
url.hash_sign = b"#"
url.fragment = hashtag[1:].encode("utf-8")
@ -684,7 +683,7 @@ class Browser:
)
def navigate_to_page(self, page_url, timeout=300):
self.logger.info("navigating to page %s", page_url)
self.logger.info("navigating to page", page_url=page_url)
self.websock_thread.got_page_load_event = None
self.websock_thread.page_status = None
self.send_to_chrome(method="Page.navigate", params={"url": page_url})
@ -712,14 +711,14 @@ class Browser:
try:
out.append(str(urlcanon.whatwg(link)))
except AddressValueError:
self.logger.warning("skip invalid outlink: %s", link)
self.logger.warning("skip invalid outlink", outlink=link)
return frozenset(out)
else:
# no links found
return frozenset()
else:
self.logger.error(
"problem extracting outlinks, result message: %s", message
"problem extracting outlinks", message=message
)
return frozenset()
@ -791,7 +790,7 @@ class Browser:
while True:
elapsed = time.time() - start
if elapsed > timeout:
logging.info("behavior reached hard timeout after %.1fs", elapsed)
self.logger.info("behavior reached hard timeout", elapsed=elapsed)
return
brozzler.sleep(check_interval)