Add instance name to RDATA/POSITION commands (#7364)

This is primarily for allowing us to send those commands from workers, but for now simply allows us to ignore echoed RDATA/POSITION commands that we sent (we get echoes of sent commands when using redis). Currently we log a WARNING on the master process every time we receive an echoed RDATA.
This commit is contained in:
Erik Johnston 2020-04-29 16:23:08 +01:00 committed by GitHub
parent 3eab76ad43
commit 37f6823f5b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 95 additions and 50 deletions

View file

@ -270,7 +270,7 @@ def start(hs, listeners=None):
# Start the tracer
synapse.logging.opentracing.init_tracer( # type: ignore[attr-defined] # noqa
hs.config
hs
)
# It is now safe to start your Synapse.
@ -316,7 +316,7 @@ def setup_sentry(hs):
scope.set_tag("matrix_server_name", hs.config.server_name)
app = hs.config.worker_app if hs.config.worker_app else "synapse.app.homeserver"
name = hs.config.worker_name if hs.config.worker_name else "master"
name = hs.get_instance_name()
scope.set_tag("worker_app", app)
scope.set_tag("worker_name", name)

View file

@ -171,7 +171,7 @@ import logging
import re
import types
from functools import wraps
from typing import Dict
from typing import TYPE_CHECKING, Dict
from canonicaljson import json
@ -179,6 +179,9 @@ from twisted.internet import defer
from synapse.config import ConfigError
if TYPE_CHECKING:
from synapse.server import HomeServer
# Helper class
@ -297,14 +300,11 @@ def _noop_context_manager(*args, **kwargs):
# Setup
def init_tracer(config):
def init_tracer(hs: "HomeServer"):
"""Set the whitelists and initialise the JaegerClient tracer
Args:
config (HomeserverConfig): The config used by the homeserver
"""
global opentracing
if not config.opentracer_enabled:
if not hs.config.opentracer_enabled:
# We don't have a tracer
opentracing = None
return
@ -315,18 +315,15 @@ def init_tracer(config):
"installed."
)
# Include the worker name
name = config.worker_name if config.worker_name else "master"
# Pull out the jaeger config if it was given. Otherwise set it to something sensible.
# See https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/config.py
set_homeserver_whitelist(config.opentracer_whitelist)
set_homeserver_whitelist(hs.config.opentracer_whitelist)
JaegerConfig(
config=config.jaeger_config,
service_name="{} {}".format(config.server_name, name),
scope_manager=LogContextScopeManager(config),
config=hs.config.jaeger_config,
service_name="{} {}".format(hs.config.server_name, hs.get_instance_name()),
scope_manager=LogContextScopeManager(hs.config),
).initialize_tracer()

View file

@ -95,7 +95,7 @@ class RdataCommand(Command):
Format::
RDATA <stream_name> <token> <row_json>
RDATA <stream_name> <instance_name> <token> <row_json>
The `<token>` may either be a numeric stream id OR "batch". The latter case
is used to support sending multiple updates with the same stream ID. This
@ -105,33 +105,40 @@ class RdataCommand(Command):
The client should batch all incoming RDATA with a token of "batch" (per
stream_name) until it sees an RDATA with a numeric stream ID.
The `<instance_name>` is the source of the new data (usually "master").
`<token>` of "batch" maps to the instance variable `token` being None.
An example of a batched series of RDATA::
RDATA presence batch ["@foo:example.com", "online", ...]
RDATA presence batch ["@bar:example.com", "online", ...]
RDATA presence 59 ["@baz:example.com", "online", ...]
RDATA presence master batch ["@foo:example.com", "online", ...]
RDATA presence master batch ["@bar:example.com", "online", ...]
RDATA presence master 59 ["@baz:example.com", "online", ...]
"""
NAME = "RDATA"
def __init__(self, stream_name, token, row):
def __init__(self, stream_name, instance_name, token, row):
self.stream_name = stream_name
self.instance_name = instance_name
self.token = token
self.row = row
@classmethod
def from_line(cls, line):
stream_name, token, row_json = line.split(" ", 2)
stream_name, instance_name, token, row_json = line.split(" ", 3)
return cls(
stream_name, None if token == "batch" else int(token), json.loads(row_json)
stream_name,
instance_name,
None if token == "batch" else int(token),
json.loads(row_json),
)
def to_line(self):
return " ".join(
(
self.stream_name,
self.instance_name,
str(self.token) if self.token is not None else "batch",
_json_encoder.encode(self.row),
)
@ -145,23 +152,31 @@ class PositionCommand(Command):
"""Sent by the server to tell the client the stream postition without
needing to send an RDATA.
Format::
POSITION <stream_name> <instance_name> <token>
On receipt of a POSITION command clients should check if they have missed
any updates, and if so then fetch them out of band.
The `<instance_name>` is the process that sent the command and is the source
of the stream.
"""
NAME = "POSITION"
def __init__(self, stream_name, token):
def __init__(self, stream_name, instance_name, token):
self.stream_name = stream_name
self.instance_name = instance_name
self.token = token
@classmethod
def from_line(cls, line):
stream_name, token = line.split(" ", 1)
return cls(stream_name, int(token))
stream_name, instance_name, token = line.split(" ", 2)
return cls(stream_name, instance_name, int(token))
def to_line(self):
return " ".join((self.stream_name, str(self.token)))
return " ".join((self.stream_name, self.instance_name, str(self.token)))
class ErrorCommand(_SimpleCommand):

View file

@ -79,6 +79,7 @@ class ReplicationCommandHandler:
self._notifier = hs.get_notifier()
self._clock = hs.get_clock()
self._instance_id = hs.get_instance_id()
self._instance_name = hs.get_instance_name()
# Set of streams that we've caught up with.
self._streams_connected = set() # type: Set[str]
@ -156,7 +157,7 @@ class ReplicationCommandHandler:
hs.config.redis.redis_host, hs.config.redis.redis_port, self._factory,
)
else:
client_name = hs.config.worker_name
client_name = hs.get_instance_name()
self._factory = DirectTcpReplicationClientFactory(hs, client_name, self)
host = hs.config.worker_replication_host
port = hs.config.worker_replication_port
@ -170,7 +171,9 @@ class ReplicationCommandHandler:
for stream_name, stream in self._streams.items():
current_token = stream.current_token()
self.send_command(PositionCommand(stream_name, current_token))
self.send_command(
PositionCommand(stream_name, self._instance_name, current_token)
)
async def on_USER_SYNC(self, conn: AbstractConnection, cmd: UserSyncCommand):
user_sync_counter.inc()
@ -235,6 +238,10 @@ class ReplicationCommandHandler:
await self._server_notices_sender.on_user_ip(cmd.user_id)
async def on_RDATA(self, conn: AbstractConnection, cmd: RdataCommand):
if cmd.instance_name == self._instance_name:
# Ignore RDATA that are just our own echoes
return
stream_name = cmd.stream_name
inbound_rdata_count.labels(stream_name).inc()
@ -286,6 +293,10 @@ class ReplicationCommandHandler:
await self._replication_data_handler.on_rdata(stream_name, token, rows)
async def on_POSITION(self, conn: AbstractConnection, cmd: PositionCommand):
if cmd.instance_name == self._instance_name:
# Ignore POSITION that are just our own echoes
return
stream = self._streams.get(cmd.stream_name)
if not stream:
logger.error("Got POSITION for unknown stream: %s", cmd.stream_name)
@ -485,7 +496,7 @@ class ReplicationCommandHandler:
We need to check if the client is interested in the stream or not
"""
self.send_command(RdataCommand(stream_name, token, data))
self.send_command(RdataCommand(stream_name, self._instance_name, token, data))
UpdateToken = TypeVar("UpdateToken")

View file

@ -234,7 +234,8 @@ class HomeServer(object):
self._listening_services = []
self.start_time = None
self.instance_id = random_string(5)
self._instance_id = random_string(5)
self._instance_name = config.worker_name or "master"
self.clock = Clock(reactor)
self.distributor = Distributor()
@ -254,7 +255,15 @@ class HomeServer(object):
This is used to distinguish running instances in worker-based
deployments.
"""
return self.instance_id
return self._instance_id
def get_instance_name(self) -> str:
"""A unique name for this synapse process.
Used to identify the process over replication and in config. Does not
change over restarts.
"""
return self._instance_name
def setup(self):
logger.info("Setting up.")

View file

@ -122,6 +122,8 @@ class HomeServer(object):
pass
def get_instance_id(self) -> str:
pass
def get_instance_name(self) -> str:
pass
def get_event_builder_factory(self) -> EventBuilderFactory:
pass
def get_storage(self) -> synapse.storage.Storage: