Have all instances correctly respond to REPLICATE command. (#7475)

Before all streams were only written to from master, so only master needed to respond to `REPLICATE` commands.

Before all instances wrote to the cache invalidation stream, but didn't respond to `REPLICATE`. This was a bug, which could lead to missed rows from cache invalidation stream if an instance is restarted, however all the caches would be empty in that case so it wasn't a problem.
This commit is contained in:
Erik Johnston 2020-05-13 10:27:02 +01:00 committed by GitHub
parent 8ca79613e6
commit 7ee24c5674
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 48 deletions

View file

@ -36,7 +36,12 @@ from synapse.replication.tcp.commands import (
UserSyncCommand,
)
from synapse.replication.tcp.protocol import AbstractConnection
from synapse.replication.tcp.streams import STREAMS_MAP, Stream
from synapse.replication.tcp.streams import (
STREAMS_MAP,
CachesStream,
FederationStream,
Stream,
)
from synapse.util.async_helpers import Linearizer
logger = logging.getLogger(__name__)
@ -73,6 +78,26 @@ class ReplicationCommandHandler:
stream.NAME: stream(hs) for stream in STREAMS_MAP.values()
} # type: Dict[str, Stream]
# List of streams that this instance is the source of
self._streams_to_replicate = [] # type: List[Stream]
for stream in self._streams.values():
if stream.NAME == CachesStream.NAME:
# All workers can write to the cache invalidation stream.
self._streams_to_replicate.append(stream)
continue
# Only add any other streams if we're on master.
if hs.config.worker_app is not None:
continue
if stream.NAME == FederationStream.NAME and hs.config.send_federation:
# We only support federation stream if federation sending
# has been disabled on the master.
continue
self._streams_to_replicate.append(stream)
self._position_linearizer = Linearizer(
"replication_position", clock=self._clock
)
@ -150,6 +175,16 @@ class ReplicationCommandHandler:
port = hs.config.worker_replication_port
hs.get_reactor().connectTCP(host, port, self._factory)
def get_streams(self) -> Dict[str, Stream]:
"""Get a map from stream name to all streams.
"""
return self._streams
def get_streams_to_replicate(self) -> List[Stream]:
"""Get a list of streams that this instances replicates.
"""
return self._streams_to_replicate
async def on_REPLICATE(self, conn: AbstractConnection, cmd: ReplicateCommand):
self.send_positions_to_connection(conn)
@ -158,15 +193,15 @@ class ReplicationCommandHandler:
the connection.
"""
# We only want to announce positions by the writer of the streams.
# Currently this is just the master process.
if not self._is_master:
return
for stream_name, stream in self._streams.items():
current_token = stream.current_token(self._instance_name)
conn.send_command(
PositionCommand(stream_name, self._instance_name, current_token)
# We respond with current position of all streams this instance
# replicates.
for stream in self.get_streams_to_replicate():
self.send_command(
PositionCommand(
stream.NAME,
self._instance_name,
stream.current_token(self._instance_name),
)
)
async def on_USER_SYNC(self, conn: AbstractConnection, cmd: UserSyncCommand):