Support generating structured logs in addition to standard logs. (#8607)

This modifies the configuration of structured logging to be usable from
the standard Python logging configuration.

This also separates the formatting of logs from the transport allowing
JSON logs to files or standard logs to sockets.
This commit is contained in:
Patrick Cloke 2020-10-29 07:27:37 -04:00 committed by GitHub
parent 9a7e0d2ea6
commit 00b24aa545
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 715 additions and 1020 deletions

View file

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Copyright 2019 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
class LoggerCleanupMixin:
def get_logger(self, handler):
"""
Attach a handler to a logger and add clean-ups to remove revert this.
"""
# Create a logger and add the handler to it.
logger = logging.getLogger(__name__)
logger.addHandler(handler)
# Ensure the logger actually logs something.
logger.setLevel(logging.INFO)
# Ensure the logger gets cleaned-up appropriately.
self.addCleanup(logger.removeHandler, handler)
self.addCleanup(logger.setLevel, logging.NOTSET)
return logger

View file

@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
# Copyright 2019 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.test.proto_helpers import AccumulatingProtocol
from synapse.logging import RemoteHandler
from tests.logging import LoggerCleanupMixin
from tests.server import FakeTransport, get_clock
from tests.unittest import TestCase
def connect_logging_client(reactor, client_id):
# This is essentially tests.server.connect_client, but disabling autoflush on
# the client transport. This is necessary to avoid an infinite loop due to
# sending of data via the logging transport causing additional logs to be
# written.
factory = reactor.tcpClients.pop(client_id)[2]
client = factory.buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, reactor))
client.makeConnection(FakeTransport(server, reactor, autoflush=False))
return client, server
class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
def setUp(self):
self.reactor, _ = get_clock()
def test_log_output(self):
"""
The remote handler delivers logs over TCP.
"""
handler = RemoteHandler("127.0.0.1", 9000, _reactor=self.reactor)
logger = self.get_logger(handler)
logger.info("Hello there, %s!", "wally")
# Trigger the connection
client, server = connect_logging_client(self.reactor, 0)
# Trigger data being sent
client.transport.flush()
# One log message, with a single trailing newline
logs = server.data.decode("utf8").splitlines()
self.assertEqual(len(logs), 1)
self.assertEqual(server.data.count(b"\n"), 1)
# Ensure the data passed through properly.
self.assertEqual(logs[0], "Hello there, wally!")
def test_log_backpressure_debug(self):
"""
When backpressure is hit, DEBUG logs will be shed.
"""
handler = RemoteHandler(
"127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor
)
logger = self.get_logger(handler)
# Send some debug messages
for i in range(0, 3):
logger.debug("debug %s" % (i,))
# Send a bunch of useful messages
for i in range(0, 7):
logger.info("info %s" % (i,))
# The last debug message pushes it past the maximum buffer
logger.debug("too much debug")
# Allow the reconnection
client, server = connect_logging_client(self.reactor, 0)
client.transport.flush()
# Only the 7 infos made it through, the debugs were elided
logs = server.data.splitlines()
self.assertEqual(len(logs), 7)
self.assertNotIn(b"debug", server.data)
def test_log_backpressure_info(self):
"""
When backpressure is hit, DEBUG and INFO logs will be shed.
"""
handler = RemoteHandler(
"127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor
)
logger = self.get_logger(handler)
# Send some debug messages
for i in range(0, 3):
logger.debug("debug %s" % (i,))
# Send a bunch of useful messages
for i in range(0, 10):
logger.warning("warn %s" % (i,))
# Send a bunch of info messages
for i in range(0, 3):
logger.info("info %s" % (i,))
# The last debug message pushes it past the maximum buffer
logger.debug("too much debug")
# Allow the reconnection
client, server = connect_logging_client(self.reactor, 0)
client.transport.flush()
# The 10 warnings made it through, the debugs and infos were elided
logs = server.data.splitlines()
self.assertEqual(len(logs), 10)
self.assertNotIn(b"debug", server.data)
self.assertNotIn(b"info", server.data)
def test_log_backpressure_cut_middle(self):
"""
When backpressure is hit, and no more DEBUG and INFOs cannot be culled,
it will cut the middle messages out.
"""
handler = RemoteHandler(
"127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor
)
logger = self.get_logger(handler)
# Send a bunch of useful messages
for i in range(0, 20):
logger.warning("warn %s" % (i,))
# Allow the reconnection
client, server = connect_logging_client(self.reactor, 0)
client.transport.flush()
# The first five and last five warnings made it through, the debugs and
# infos were elided
logs = server.data.decode("utf8").splitlines()
self.assertEqual(
["warn %s" % (i,) for i in range(5)]
+ ["warn %s" % (i,) for i in range(15, 20)],
logs,
)

View file

@ -1,214 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2019 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import os.path
import shutil
import sys
import textwrap
from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile
from synapse.config.logger import setup_logging
from synapse.logging._structured import setup_structured_logging
from synapse.logging.context import LoggingContext
from tests.unittest import DEBUG, HomeserverTestCase
class FakeBeginner:
def beginLoggingTo(self, observers, **kwargs):
self.observers = observers
class StructuredLoggingTestBase:
"""
Test base that registers a cleanup handler to reset the stdlib log handler
to 'unset'.
"""
def prepare(self, reactor, clock, hs):
def _cleanup():
logging.getLogger("synapse").setLevel(logging.NOTSET)
self.addCleanup(_cleanup)
class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase):
"""
Tests for Synapse's structured logging support.
"""
def test_output_to_json_round_trip(self):
"""
Synapse logs can be outputted to JSON and then read back again.
"""
temp_dir = self.mktemp()
os.mkdir(temp_dir)
self.addCleanup(shutil.rmtree, temp_dir)
json_log_file = os.path.abspath(os.path.join(temp_dir, "out.json"))
log_config = {
"drains": {"jsonfile": {"type": "file_json", "location": json_log_file}}
}
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs, self.hs.config, log_config, logBeginner=beginner
)
# Make a logger and send an event
logger = Logger(
namespace="tests.logging.test_structured", observer=beginner.observers[0]
)
logger.info("Hello there, {name}!", name="wally")
# Read the log file and check it has the event we sent
with open(json_log_file, "r") as f:
logged_events = list(eventsFromJSONLogFile(f))
self.assertEqual(len(logged_events), 1)
# The event pulled from the file should render fine
self.assertEqual(
eventAsText(logged_events[0], includeTimestamp=False),
"[tests.logging.test_structured#info] Hello there, wally!",
)
def test_output_to_text(self):
"""
Synapse logs can be outputted to text.
"""
temp_dir = self.mktemp()
os.mkdir(temp_dir)
self.addCleanup(shutil.rmtree, temp_dir)
log_file = os.path.abspath(os.path.join(temp_dir, "out.log"))
log_config = {"drains": {"file": {"type": "file", "location": log_file}}}
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs, self.hs.config, log_config, logBeginner=beginner
)
# Make a logger and send an event
logger = Logger(
namespace="tests.logging.test_structured", observer=beginner.observers[0]
)
logger.info("Hello there, {name}!", name="wally")
# Read the log file and check it has the event we sent
with open(log_file, "r") as f:
logged_events = f.read().strip().split("\n")
self.assertEqual(len(logged_events), 1)
# The event pulled from the file should render fine
self.assertTrue(
logged_events[0].endswith(
" - tests.logging.test_structured - INFO - None - Hello there, wally!"
)
)
def test_collects_logcontext(self):
"""
Test that log outputs have the attached logging context.
"""
log_config = {"drains": {}}
# Begin the logger with our config
beginner = FakeBeginner()
publisher = setup_structured_logging(
self.hs, self.hs.config, log_config, logBeginner=beginner
)
logs = []
publisher.addObserver(logs.append)
# Make a logger and send an event
logger = Logger(
namespace="tests.logging.test_structured", observer=beginner.observers[0]
)
with LoggingContext("testcontext", request="somereq"):
logger.info("Hello there, {name}!", name="steve")
self.assertEqual(len(logs), 1)
self.assertEqual(logs[0]["request"], "somereq")
class StructuredLoggingConfigurationFileTestCase(
StructuredLoggingTestBase, HomeserverTestCase
):
def make_homeserver(self, reactor, clock):
tempdir = self.mktemp()
os.mkdir(tempdir)
log_config_file = os.path.abspath(os.path.join(tempdir, "log.config.yaml"))
self.homeserver_log = os.path.abspath(os.path.join(tempdir, "homeserver.log"))
config = self.default_config()
config["log_config"] = log_config_file
with open(log_config_file, "w") as f:
f.write(
textwrap.dedent(
"""\
structured: true
drains:
file:
type: file_json
location: %s
"""
% (self.homeserver_log,)
)
)
self.addCleanup(self._sys_cleanup)
return self.setup_test_homeserver(config=config)
def _sys_cleanup(self):
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
# Do not remove! We need the logging system to be set other than WARNING.
@DEBUG
def test_log_output(self):
"""
When a structured logging config is given, Synapse will use it.
"""
beginner = FakeBeginner()
publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner)
# Make a logger and send an event
logger = Logger(namespace="tests.logging.test_structured", observer=publisher)
with LoggingContext("testcontext", request="somereq"):
logger.info("Hello there, {name}!", name="steve")
with open(self.homeserver_log, "r") as f:
logged_events = [
eventAsText(x, includeTimestamp=False) for x in eventsFromJSONLogFile(f)
]
logs = "\n".join(logged_events)
self.assertTrue("***** STARTING SERVER *****" in logs)
self.assertTrue("Hello there, steve!" in logs)

View file

@ -14,57 +14,33 @@
# limitations under the License.
import json
from collections import Counter
import logging
from io import StringIO
from twisted.logger import Logger
from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter
from synapse.logging._structured import setup_structured_logging
from tests.server import connect_client
from tests.unittest import HomeserverTestCase
from .test_structured import FakeBeginner, StructuredLoggingTestBase
from tests.logging import LoggerCleanupMixin
from tests.unittest import TestCase
class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase):
def test_log_output(self):
class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
def test_terse_json_output(self):
"""
The Terse JSON outputter delivers simplified structured logs over TCP.
The Terse JSON formatter converts log messages to JSON.
"""
log_config = {
"drains": {
"tersejson": {
"type": "network_json_terse",
"host": "127.0.0.1",
"port": 8000,
}
}
}
output = StringIO()
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs, self.hs.config, log_config, logBeginner=beginner
)
handler = logging.StreamHandler(output)
handler.setFormatter(TerseJsonFormatter())
logger = self.get_logger(handler)
logger = Logger(
namespace="tests.logging.test_terse_json", observer=beginner.observers[0]
)
logger.info("Hello there, {name}!", name="wally")
logger.info("Hello there, %s!", "wally")
# Trigger the connection
self.pump()
_, server = connect_client(self.reactor, 0)
# Trigger data being sent
self.pump()
# One log message, with a single trailing newline
logs = server.data.decode("utf8").splitlines()
# One log message, with a single trailing newline.
data = output.getvalue()
logs = data.splitlines()
self.assertEqual(len(logs), 1)
self.assertEqual(server.data.count(b"\n"), 1)
self.assertEqual(data.count("\n"), 1)
log = json.loads(logs[0])
# The terse logger should give us these keys.
@ -72,163 +48,74 @@ class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase):
"log",
"time",
"level",
"log_namespace",
"request",
"scope",
"server_name",
"name",
"namespace",
]
self.assertCountEqual(log.keys(), expected_log_keys)
self.assertEqual(log["log"], "Hello there, wally!")
def test_extra_data(self):
"""
Additional information can be included in the structured logging.
"""
output = StringIO()
handler = logging.StreamHandler(output)
handler.setFormatter(TerseJsonFormatter())
logger = self.get_logger(handler)
logger.info(
"Hello there, %s!", "wally", extra={"foo": "bar", "int": 3, "bool": True}
)
# One log message, with a single trailing newline.
data = output.getvalue()
logs = data.splitlines()
self.assertEqual(len(logs), 1)
self.assertEqual(data.count("\n"), 1)
log = json.loads(logs[0])
# The terse logger should give us these keys.
expected_log_keys = [
"log",
"time",
"level",
"namespace",
# The additional keys given via extra.
"foo",
"int",
"bool",
]
self.assertCountEqual(log.keys(), expected_log_keys)
# It contains the data we expect.
self.assertEqual(log["name"], "wally")
# Check the values of the extra fields.
self.assertEqual(log["foo"], "bar")
self.assertEqual(log["int"], 3)
self.assertIs(log["bool"], True)
def test_log_backpressure_debug(self):
def test_json_output(self):
"""
When backpressure is hit, DEBUG logs will be shed.
The Terse JSON formatter converts log messages to JSON.
"""
log_config = {
"loggers": {"synapse": {"level": "DEBUG"}},
"drains": {
"tersejson": {
"type": "network_json_terse",
"host": "127.0.0.1",
"port": 8000,
"maximum_buffer": 10,
}
},
}
output = StringIO()
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs,
self.hs.config,
log_config,
logBeginner=beginner,
redirect_stdlib_logging=False,
)
handler = logging.StreamHandler(output)
handler.setFormatter(JsonFormatter())
logger = self.get_logger(handler)
logger = Logger(
namespace="synapse.logging.test_terse_json", observer=beginner.observers[0]
)
logger.info("Hello there, %s!", "wally")
# Send some debug messages
for i in range(0, 3):
logger.debug("debug %s" % (i,))
# One log message, with a single trailing newline.
data = output.getvalue()
logs = data.splitlines()
self.assertEqual(len(logs), 1)
self.assertEqual(data.count("\n"), 1)
log = json.loads(logs[0])
# Send a bunch of useful messages
for i in range(0, 7):
logger.info("test message %s" % (i,))
# The last debug message pushes it past the maximum buffer
logger.debug("too much debug")
# Allow the reconnection
_, server = connect_client(self.reactor, 0)
self.pump()
# Only the 7 infos made it through, the debugs were elided
logs = server.data.splitlines()
self.assertEqual(len(logs), 7)
def test_log_backpressure_info(self):
"""
When backpressure is hit, DEBUG and INFO logs will be shed.
"""
log_config = {
"loggers": {"synapse": {"level": "DEBUG"}},
"drains": {
"tersejson": {
"type": "network_json_terse",
"host": "127.0.0.1",
"port": 8000,
"maximum_buffer": 10,
}
},
}
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs,
self.hs.config,
log_config,
logBeginner=beginner,
redirect_stdlib_logging=False,
)
logger = Logger(
namespace="synapse.logging.test_terse_json", observer=beginner.observers[0]
)
# Send some debug messages
for i in range(0, 3):
logger.debug("debug %s" % (i,))
# Send a bunch of useful messages
for i in range(0, 10):
logger.warn("test warn %s" % (i,))
# Send a bunch of info messages
for i in range(0, 3):
logger.info("test message %s" % (i,))
# The last debug message pushes it past the maximum buffer
logger.debug("too much debug")
# Allow the reconnection
client, server = connect_client(self.reactor, 0)
self.pump()
# The 10 warnings made it through, the debugs and infos were elided
logs = list(map(json.loads, server.data.decode("utf8").splitlines()))
self.assertEqual(len(logs), 10)
self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10})
def test_log_backpressure_cut_middle(self):
"""
When backpressure is hit, and no more DEBUG and INFOs cannot be culled,
it will cut the middle messages out.
"""
log_config = {
"loggers": {"synapse": {"level": "DEBUG"}},
"drains": {
"tersejson": {
"type": "network_json_terse",
"host": "127.0.0.1",
"port": 8000,
"maximum_buffer": 10,
}
},
}
# Begin the logger with our config
beginner = FakeBeginner()
setup_structured_logging(
self.hs,
self.hs.config,
log_config,
logBeginner=beginner,
redirect_stdlib_logging=False,
)
logger = Logger(
namespace="synapse.logging.test_terse_json", observer=beginner.observers[0]
)
# Send a bunch of useful messages
for i in range(0, 20):
logger.warn("test warn", num=i)
# Allow the reconnection
client, server = connect_client(self.reactor, 0)
self.pump()
# The first five and last five warnings made it through, the debugs and
# infos were elided
logs = list(map(json.loads, server.data.decode("utf8").splitlines()))
self.assertEqual(len(logs), 10)
self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10})
self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs])
# The terse logger should give us these keys.
expected_log_keys = [
"log",
"level",
"namespace",
]
self.assertCountEqual(log.keys(), expected_log_keys)
self.assertEqual(log["log"], "Hello there, wally!")