outlinks: simplify outlink parsing
Some checks failed
Python Formatting Check / formatting (push) Has been cancelled
Tests / Run tests (push) Has been cancelled

The outlinks are collected as HTMLAnchorElement objects. The previous
version handled stringifying them by collecting the entire set of
objects into a single newline-delimited string, then splitting it back
up again in Python. It seems easier to just send back a JSON array of
strings and have Python iterate over them that way.
This commit is contained in:
Misty De Méo 2025-12-17 16:22:23 -08:00 committed by Misty De Méo
parent 93bb1a9a35
commit 33fffdfefd
2 changed files with 8 additions and 4 deletions

View file

@ -784,7 +784,11 @@ class Browser:
# Now we actually do outlink extraction
msg_id = self.send_to_chrome(
method="Runtime.evaluate",
params={"expression": "__brzl_outlinksString()"},
params={
"expression": "__brzl_extractOutlinks()",
# returnByValue ensures we can receive an array response
"returnByValue": True,
},
)
self._wait_for(
lambda: self.websock_thread.received_result(msg_id), timeout=timeout
@ -798,7 +802,7 @@ class Browser:
):
if message["result"]["result"]["value"]:
out = []
for link in message["result"]["result"]["value"].split("\n"):
for link in message["result"]["result"]["value"]:
try:
out.append(str(urlcanon.whatwg(link)))
except AddressValueError:

View file

@ -36,6 +36,6 @@ var __brzl_compileOutlinks = function(frame) {
return outlinks;
}
var __brzl_outlinksString = function() {
return __brzl_compileOutlinks(window).join('\n');
var __brzl_extractOutlinks = function() {
return __brzl_compileOutlinks(window).map(el => el.toString());
}