mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-17 11:29:19 -04:00
improve TRACE level logging
This commit is contained in:
parent
ed2d58d87d
commit
5f4c5190da
6 changed files with 25 additions and 15 deletions
|
@ -77,8 +77,20 @@ def fixup(url, hash_strip=False):
|
||||||
hurl.host = hurl.host.encode('idna').decode('ascii').lower()
|
hurl.host = hurl.host.encode('idna').decode('ascii').lower()
|
||||||
return hurl.getURLString()
|
return hurl.getURLString()
|
||||||
|
|
||||||
# logging level more fine-grained than logging.DEBUG==10
|
# monkey-patch log level TRACE
|
||||||
TRACE = 5
|
TRACE = 5
|
||||||
|
import logging, sys
|
||||||
|
def _logging_trace(msg, *args, **kwargs):
|
||||||
|
if len(logging.root.handlers) == 0:
|
||||||
|
basicConfig()
|
||||||
|
logging.root.trace(msg, *args, **kwargs)
|
||||||
|
def _logger_trace(self, msg, *args, **kwargs):
|
||||||
|
if self.isEnabledFor(TRACE):
|
||||||
|
self._log(TRACE, msg, args, **kwargs)
|
||||||
|
logging.trace = _logging_trace
|
||||||
|
logging.Logger.trace = _logger_trace
|
||||||
|
logging._levelToName[TRACE] = 'TRACE'
|
||||||
|
logging._nameToLevel['TRACE'] = TRACE
|
||||||
|
|
||||||
_behaviors = None
|
_behaviors = None
|
||||||
def behaviors():
|
def behaviors():
|
||||||
|
|
|
@ -231,11 +231,11 @@ class Chrome:
|
||||||
b'CERT_PKIXVerifyCert for [^ ]* failed|'
|
b'CERT_PKIXVerifyCert for [^ ]* failed|'
|
||||||
b'^ALSA lib|ERROR:gl_surface_glx.cc|'
|
b'^ALSA lib|ERROR:gl_surface_glx.cc|'
|
||||||
b'ERROR:gpu_child_thread.cc', buf):
|
b'ERROR:gpu_child_thread.cc', buf):
|
||||||
logging.log(
|
self.logger.trace(
|
||||||
brozzler.TRACE, 'chrome pid %s STDOUT %s',
|
'chrome pid %s STDOUT %s',
|
||||||
self.chrome_process.pid, buf)
|
self.chrome_process.pid, buf)
|
||||||
else:
|
else:
|
||||||
logging.debug(
|
self.logger.debug(
|
||||||
'chrome pid %s STDOUT %s',
|
'chrome pid %s STDOUT %s',
|
||||||
self.chrome_process.pid, buf)
|
self.chrome_process.pid, buf)
|
||||||
|
|
||||||
|
@ -246,15 +246,15 @@ class Chrome:
|
||||||
b'CERT_PKIXVerifyCert for [^ ]* failed|'
|
b'CERT_PKIXVerifyCert for [^ ]* failed|'
|
||||||
b'^ALSA lib|ERROR:gl_surface_glx.cc|'
|
b'^ALSA lib|ERROR:gl_surface_glx.cc|'
|
||||||
b'ERROR:gpu_child_thread.cc', buf):
|
b'ERROR:gpu_child_thread.cc', buf):
|
||||||
logging.log(
|
self.logger.trace(
|
||||||
brozzler.TRACE, 'chrome pid %s STDOUT %s',
|
'chrome pid %s STDOUT %s',
|
||||||
self.chrome_process.pid, buf)
|
self.chrome_process.pid, buf)
|
||||||
else:
|
else:
|
||||||
logging.debug(
|
self.logger.debug(
|
||||||
'chrome pid %s STDERR %s',
|
'chrome pid %s STDERR %s',
|
||||||
self.chrome_process.pid, buf)
|
self.chrome_process.pid, buf)
|
||||||
except:
|
except:
|
||||||
logging.error('unexpected exception', exc_info=True)
|
self.logger.error('unexpected exception', exc_info=True)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
if not self.chrome_process or self._shutdown.is_set():
|
if not self.chrome_process or self._shutdown.is_set():
|
||||||
|
|
|
@ -91,8 +91,7 @@ def _add_proxy_options(arg_parser):
|
||||||
|
|
||||||
def configure_logging(args):
|
def configure_logging(args):
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
stream=sys.stderr, level=args.log_level,
|
stream=sys.stderr, level=args.log_level, format=(
|
||||||
format=(
|
|
||||||
'%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
'%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
||||||
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s'))
|
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s'))
|
||||||
logging.getLogger('requests.packages.urllib3').setLevel(logging.WARN)
|
logging.getLogger('requests.packages.urllib3').setLevel(logging.WARN)
|
||||||
|
|
|
@ -119,7 +119,7 @@ class RethinkDbFrontier:
|
||||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||||
|
|
||||||
def update_page(self, page):
|
def update_page(self, page):
|
||||||
self.logger.debug("updating 'pages' table entry %s", page)
|
self.logger.trace("updating 'pages' table entry %s", page)
|
||||||
result = self.r.table("pages").get(page.id).replace(page.to_dict()).run()
|
result = self.r.table("pages").get(page.id).replace(page.to_dict()).run()
|
||||||
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
self._vet_result(result, replaced=[0,1], unchanged=[0,1])
|
||||||
|
|
||||||
|
|
|
@ -383,9 +383,8 @@ class BrozzlerWorker:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.status_info = self._service_registry.heartbeat(status_info)
|
self.status_info = self._service_registry.heartbeat(status_info)
|
||||||
self.logger.log(
|
self.logger.trace(
|
||||||
brozzler.TRACE, "status in service registry: %s",
|
"status in service registry: %s", self.status_info)
|
||||||
self.status_info)
|
|
||||||
except rethinkdb.ReqlError as e:
|
except rethinkdb.ReqlError as e:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
"failed to send heartbeat and update service registry "
|
"failed to send heartbeat and update service registry "
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b9.dev179',
|
version='1.1b9.dev180',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue