mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Merge 17ab2259defec6c6ce23341fd3e2602f0099e0dc into 69d682beb9fe941dc466c81c4142198018dabe25
This commit is contained in:
commit
89fc10a0ef
@ -1,7 +1,7 @@
|
||||
"""
|
||||
brozzler/browser.py - manages the browsers for brozzler
|
||||
|
||||
Copyright (C) 2014-2024 Internet Archive
|
||||
Copyright (C) 2014-2025 Internet Archive
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -33,6 +33,8 @@ from brozzler.chrome import Chrome
|
||||
import socket
|
||||
import urlcanon
|
||||
|
||||
MAX_UNMATCHED_INVALID_CHECKS = 5
|
||||
|
||||
|
||||
class BrowsingException(Exception):
|
||||
pass
|
||||
@ -584,7 +586,9 @@ class Browser:
|
||||
behavior_script = brozzler.behavior_script(
|
||||
page_url, behavior_parameters, behaviors_dir=behaviors_dir
|
||||
)
|
||||
self.run_behavior(behavior_script, timeout=behavior_timeout)
|
||||
self.run_behavior(
|
||||
behavior_script, page_url, timeout=behavior_timeout
|
||||
)
|
||||
final_page_url = self.url()
|
||||
if on_screenshot:
|
||||
if simpler404:
|
||||
@ -779,7 +783,7 @@ class Browser:
|
||||
message = self.websock_thread.pop_result(msg_id)
|
||||
return message["result"]["result"]["value"]
|
||||
|
||||
def run_behavior(self, behavior_script, timeout=900):
|
||||
def run_behavior(self, behavior_script, page_url, timeout=900):
|
||||
self.send_to_chrome(
|
||||
method="Runtime.evaluate",
|
||||
suppress_logging=True,
|
||||
@ -788,14 +792,35 @@ class Browser:
|
||||
|
||||
check_interval = min(timeout, 7)
|
||||
start = time.time()
|
||||
valid_behavior_checks = 0
|
||||
invalid_behavior_checks = 0
|
||||
while True:
|
||||
elapsed = time.time() - start
|
||||
if elapsed > timeout:
|
||||
logging.info("behavior reached hard timeout after %.1fs", elapsed)
|
||||
logging.info(
|
||||
"behavior reached hard timeout after %.1fs and %s valid checks, and %s invalid checks, for url %s",
|
||||
elapsed,
|
||||
valid_behavior_checks,
|
||||
invalid_behavior_checks,
|
||||
page_url,
|
||||
)
|
||||
return
|
||||
|
||||
brozzler.sleep(check_interval)
|
||||
|
||||
if (
|
||||
invalid_behavior_checks > valid_behavior_checks
|
||||
and invalid_behavior_checks > MAX_UNMATCHED_INVALID_CHECKS
|
||||
):
|
||||
logging.warn(
|
||||
"behavior logged too many invalid checks, %s, after %.1fs and %s valid checks, for url %s",
|
||||
invalid_behavior_checks,
|
||||
elapsed,
|
||||
valid_behavior_checks,
|
||||
page_url,
|
||||
)
|
||||
return
|
||||
|
||||
self.websock_thread.expect_result(self._command_id.peek())
|
||||
msg_id = self.send_to_chrome(
|
||||
method="Runtime.evaluate",
|
||||
@ -807,6 +832,17 @@ class Browser:
|
||||
lambda: self.websock_thread.received_result(msg_id), timeout=5
|
||||
)
|
||||
msg = self.websock_thread.pop_result(msg_id)
|
||||
if (
|
||||
msg
|
||||
and "result" in msg["result"]
|
||||
and type(msg["result"]["result"]["value"]) is bool
|
||||
and not msg["result"]["result"]["value"]
|
||||
):
|
||||
# valid behavior response while still running
|
||||
# {'id': 8, 'result': {'result': {'type': 'boolean', 'value': False}}}
|
||||
valid_behavior_checks += 1
|
||||
continue
|
||||
|
||||
if (
|
||||
msg
|
||||
and "result" in msg
|
||||
@ -818,10 +854,21 @@ class Browser:
|
||||
and type(msg["result"]["result"]["value"]) == bool
|
||||
and msg["result"]["result"]["value"]
|
||||
):
|
||||
self.logger.info("behavior decided it has finished")
|
||||
# valid behavior response when finished
|
||||
# {'id': 9, 'result': {'result': {'type': 'boolean', 'value': True}}}
|
||||
elapsed = time.time() - start
|
||||
self.logger.info(
|
||||
"behavior decided it has finished after %.1fs and %s valid checks, and %s invalid checks, for url %s",
|
||||
elapsed,
|
||||
valid_behavior_checks,
|
||||
invalid_behavior_checks,
|
||||
page_url,
|
||||
)
|
||||
return
|
||||
invalid_behavior_checks += 1
|
||||
|
||||
except BrowsingTimeout:
|
||||
pass
|
||||
invalid_behavior_checks += 1
|
||||
|
||||
def try_login(self, username, password, timeout=300):
|
||||
try_login_js = (
|
||||
|
Loading…
x
Reference in New Issue
Block a user