From 080270204a58cc7677ed8e8d52cdfc5c53e9ce5d Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 12 Feb 2025 16:11:51 -0800 Subject: [PATCH 1/2] stop behavior after too many invalid checks --- brozzler/browser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/brozzler/browser.py b/brozzler/browser.py index b15cf30..3198c08 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -33,6 +33,7 @@ from brozzler.chrome import Chrome import socket import urlcanon +MAX_UNMATCHED_INVALID_CHECKS = 5 class BrowsingException(Exception): pass @@ -806,6 +807,16 @@ class Browser: brozzler.sleep(check_interval) + if invalid_behavior_checks > valid_behavior_checks and invalid_behavior checks > MAX_UNMATCHED_INVALID_CHECKS: + logging.warn( + "behavior logged too many invalid checks, %s, after %.1fs and %s valid checks, for url %s", + elapsed, + valid_behavior_checks, + invalid_behavior_checks, + page_url, + ) + return + self.websock_thread.expect_result(self._command_id.peek()) msg_id = self.send_to_chrome( method="Runtime.evaluate", From 46ecaad6be974ee78f6bda091a0ab9fda23c2ac3 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 12 Feb 2025 16:12:55 -0800 Subject: [PATCH 2/2] bump version --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d0a35ad..808ba87 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ """ setup.py - brozzler setup script -Copyright (C) 2014-2024 Internet Archive +Copyright (C) 2014-2025 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ def find_package_data(package): setuptools.setup( name="brozzler", - version="1.6.7a0", + version="1.6.9", description="Distributed web crawling with browsers", url="https://github.com/internetarchive/brozzler", author="Noah Levitt",