diff --git a/brozzler/__init__.py b/brozzler/__init__.py index 06f3d0c..bce1dee 100644 --- a/brozzler/__init__.py +++ b/brozzler/__init__.py @@ -17,6 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. """ +import logging from pkg_resources import get_distribution as _get_distribution __version__ = _get_distribution('brozzler').version @@ -57,18 +58,22 @@ class ReachedLimit(Exception): def __str__(self): return self.__repr__() -# monkey-patch log level TRACE -TRACE = 5 -import logging -def _logging_trace(msg, *args, **kwargs): - logging.root.trace(msg, *args, **kwargs) +# monkey-patch log levels TRACE and NOTICE +logging.TRACE = (logging.NOTSET + logging.DEBUG) // 2 def _logger_trace(self, msg, *args, **kwargs): - if self.isEnabledFor(TRACE): - self._log(TRACE, msg, args, **kwargs) -logging.trace = _logging_trace + if self.isEnabledFor(logging.TRACE): + self._log(logging.TRACE, msg, args, **kwargs) logging.Logger.trace = _logger_trace -logging._levelToName[TRACE] = 'TRACE' -logging._nameToLevel['TRACE'] = TRACE +logging.trace = logging.root.trace +logging.addLevelName(logging.TRACE, 'TRACE') + +logging.NOTICE = (logging.INFO + logging.WARN) // 2 +def _logger_notice(self, msg, *args, **kwargs): + if self.isEnabledFor(logging.NOTICE): + self._log(logging.NOTICE, msg, args, **kwargs) +logging.Logger.notice = _logger_notice +logging.notice = logging.root.notice +logging.addLevelName(logging.NOTICE, 'NOTICE') # see https://github.com/internetarchive/brozzler/issues/91 def _logging_handler_handle(self, record): diff --git a/brozzler/browser.py b/brozzler/browser.py index ea0063c..a7d39d1 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -317,7 +317,7 @@ class Browser: kwargs['id'] = msg_id msg = json.dumps(kwargs, separators=',:') logging.log( - brozzler.TRACE if suppress_logging else logging.DEBUG, + logging.TRACE if suppress_logging else logging.DEBUG, 'sending message to %s: %s', self.websock, msg) self.websock.send(msg) return msg_id diff --git a/brozzler/cli.py b/brozzler/cli.py index 4d4aca0..d843fe9 100644 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -43,15 +43,14 @@ def add_common_options(arg_parser, argv=None): argv = argv or sys.argv arg_parser.add_argument( '-q', '--quiet', dest='log_level', action='store_const', - default=logging.INFO, const=logging.WARN, help=( - 'quiet logging, only warnings and errors')) + default=logging.INFO, const=logging.NOTICE, help='quiet logging') arg_parser.add_argument( '-v', '--verbose', dest='log_level', action='store_const', default=logging.INFO, const=logging.DEBUG, help=( 'verbose logging')) arg_parser.add_argument( '--trace', dest='log_level', action='store_const', - default=logging.INFO, const=brozzler.TRACE, help=( + default=logging.INFO, const=logging.TRACE, help=( 'very verbose logging')) # arg_parser.add_argument( # '-s', '--silent', dest='log_level', action='store_const', diff --git a/brozzler/frontier.py b/brozzler/frontier.py index 2e076d3..5272a88 100644 --- a/brozzler/frontier.py +++ b/brozzler/frontier.py @@ -94,6 +94,7 @@ class RethinkDbFrontier: k, expected, result)) def claim_sites(self, n=1): + self.logger.trace('claiming up to %s sites to brozzle', n) result = ( self.rr.table('sites').get_all(r.args( r.db(self.rr.dbname).table('sites', read_mode='majority') @@ -145,6 +146,7 @@ class RethinkDbFrontier: result["changes"][i]["old_val"]["last_claimed"]) site = brozzler.Site(self.rr, result["changes"][i]["new_val"]) sites.append(site) + self.logger.debug('claimed %s sites', len(sites)) if sites: return sites else: diff --git a/brozzler/worker.py b/brozzler/worker.py index 41c98db..89ebf21 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -634,6 +634,7 @@ class BrozzlerWorker: Raises: NoBrowsersAvailable if none available ''' + # acquire_multi() raises NoBrowsersAvailable if none available browsers = self._browser_pool.acquire_multi( (self._browser_pool.num_available() + 1) // 2) try: @@ -656,22 +657,26 @@ class BrozzlerWorker: self._browser_pool.release(browsers[i]) def run(self): - self.logger.info("brozzler worker starting") + self.logger.notice("brozzler worker starting") + last_nothing_to_claim = 0 try: while not self._shutdown.is_set(): self._service_heartbeat_if_due() - try: - self._start_browsing_some_sites() - except brozzler.browser.NoBrowsersAvailable: - logging.trace( - "all %s browsers are in use", self._max_browsers) - except brozzler.NothingToClaim: - logging.trace( - "all active sites are already claimed by a " - "brozzler worker") + if time.time() - last_nothing_to_claim > 20: + try: + self._start_browsing_some_sites() + except brozzler.browser.NoBrowsersAvailable: + logging.trace( + "all %s browsers are in use", + self._max_browsers) + except brozzler.NothingToClaim: + last_nothing_to_claim = time.time() + logging.trace( + "nothing to claim, all available active sites " + "are already claimed by a brozzler worker") time.sleep(0.5) - self.logger.info("shutdown requested") + self.logger.notice("shutdown requested") except r.ReqlError as e: self.logger.error( "caught rethinkdb exception, will try to proceed",