mirror of
https://git.anonymousland.org/anonymousland/synapse-product.git
synced 2024-10-01 08:25:44 -04:00
Wake up transaction queue when remote server comes back online (#6706)
This will be used to retry outbound transactions to a remote server if we think it might have come back up.
This commit is contained in:
parent
5ce0b17e38
commit
a8a50f5b57
1
changelog.d/6706.misc
Normal file
1
changelog.d/6706.misc
Normal file
@ -0,0 +1 @@
|
|||||||
|
Attempt to retry sending a transaction when we detect a remote server has come back online, rather than waiting for a transaction to be triggered by new data.
|
@ -209,7 +209,7 @@ Where `<token>` may be either:
|
|||||||
* a numeric stream_id to stream updates since (exclusive)
|
* a numeric stream_id to stream updates since (exclusive)
|
||||||
* `NOW` to stream all subsequent updates.
|
* `NOW` to stream all subsequent updates.
|
||||||
|
|
||||||
The `<stream_name>` is the name of a replication stream to subscribe
|
The `<stream_name>` is the name of a replication stream to subscribe
|
||||||
to (see [here](../synapse/replication/tcp/streams/_base.py) for a list
|
to (see [here](../synapse/replication/tcp/streams/_base.py) for a list
|
||||||
of streams). It can also be `ALL` to subscribe to all known streams,
|
of streams). It can also be `ALL` to subscribe to all known streams,
|
||||||
in which case the `<token>` must be set to `NOW`.
|
in which case the `<token>` must be set to `NOW`.
|
||||||
@ -234,6 +234,10 @@ in which case the `<token>` must be set to `NOW`.
|
|||||||
|
|
||||||
Used exclusively in tests
|
Used exclusively in tests
|
||||||
|
|
||||||
|
### REMOTE_SERVER_UP (S, C)
|
||||||
|
|
||||||
|
Inform other processes that a remote server may have come back online.
|
||||||
|
|
||||||
See `synapse/replication/tcp/commands.py` for a detailed description and
|
See `synapse/replication/tcp/commands.py` for a detailed description and
|
||||||
the format of each command.
|
the format of each command.
|
||||||
|
|
||||||
|
@ -158,6 +158,13 @@ class FederationSenderReplicationHandler(ReplicationClientHandler):
|
|||||||
args.update(self.send_handler.stream_positions())
|
args.update(self.send_handler.stream_positions())
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
def on_remote_server_up(self, server: str):
|
||||||
|
"""Called when get a new REMOTE_SERVER_UP command."""
|
||||||
|
|
||||||
|
# Let's wake up the transaction queue for the server in case we have
|
||||||
|
# pending stuff to send to it.
|
||||||
|
self.send_handler.wake_destination(server)
|
||||||
|
|
||||||
|
|
||||||
def start(config_options):
|
def start(config_options):
|
||||||
try:
|
try:
|
||||||
@ -205,7 +212,7 @@ class FederationSenderHandler(object):
|
|||||||
to the federation sender.
|
to the federation sender.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, hs, replication_client):
|
def __init__(self, hs: FederationSenderServer, replication_client):
|
||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self._is_mine_id = hs.is_mine_id
|
self._is_mine_id = hs.is_mine_id
|
||||||
self.federation_sender = hs.get_federation_sender()
|
self.federation_sender = hs.get_federation_sender()
|
||||||
@ -226,6 +233,9 @@ class FederationSenderHandler(object):
|
|||||||
self.store.get_room_max_stream_ordering()
|
self.store.get_room_max_stream_ordering()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def wake_destination(self, server: str):
|
||||||
|
self.federation_sender.wake_destination(server)
|
||||||
|
|
||||||
def stream_positions(self):
|
def stream_positions(self):
|
||||||
return {"federation": self.federation_position}
|
return {"federation": self.federation_position}
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ from prometheus_client import Counter
|
|||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
|
import synapse
|
||||||
import synapse.metrics
|
import synapse.metrics
|
||||||
from synapse.federation.sender.per_destination_queue import PerDestinationQueue
|
from synapse.federation.sender.per_destination_queue import PerDestinationQueue
|
||||||
from synapse.federation.sender.transaction_manager import TransactionManager
|
from synapse.federation.sender.transaction_manager import TransactionManager
|
||||||
@ -54,7 +55,7 @@ sent_pdus_destination_dist_total = Counter(
|
|||||||
|
|
||||||
|
|
||||||
class FederationSender(object):
|
class FederationSender(object):
|
||||||
def __init__(self, hs):
|
def __init__(self, hs: "synapse.server.HomeServer"):
|
||||||
self.hs = hs
|
self.hs = hs
|
||||||
self.server_name = hs.hostname
|
self.server_name = hs.hostname
|
||||||
|
|
||||||
@ -482,7 +483,20 @@ class FederationSender(object):
|
|||||||
|
|
||||||
def send_device_messages(self, destination):
|
def send_device_messages(self, destination):
|
||||||
if destination == self.server_name:
|
if destination == self.server_name:
|
||||||
logger.info("Not sending device update to ourselves")
|
logger.warning("Not sending device update to ourselves")
|
||||||
|
return
|
||||||
|
|
||||||
|
self._get_per_destination_queue(destination).attempt_new_transaction()
|
||||||
|
|
||||||
|
def wake_destination(self, destination: str):
|
||||||
|
"""Called when we want to retry sending transactions to a remote.
|
||||||
|
|
||||||
|
This is mainly useful if the remote server has been down and we think it
|
||||||
|
might have come back.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if destination == self.server_name:
|
||||||
|
logger.warning("Not waking up ourselves")
|
||||||
return
|
return
|
||||||
|
|
||||||
self._get_per_destination_queue(destination).attempt_new_transaction()
|
self._get_per_destination_queue(destination).attempt_new_transaction()
|
||||||
|
@ -44,6 +44,7 @@ from synapse.logging.opentracing import (
|
|||||||
tags,
|
tags,
|
||||||
whitelisted_homeserver,
|
whitelisted_homeserver,
|
||||||
)
|
)
|
||||||
|
from synapse.server import HomeServer
|
||||||
from synapse.types import ThirdPartyInstanceID, get_domain_from_id
|
from synapse.types import ThirdPartyInstanceID, get_domain_from_id
|
||||||
from synapse.util.ratelimitutils import FederationRateLimiter
|
from synapse.util.ratelimitutils import FederationRateLimiter
|
||||||
from synapse.util.versionstring import get_version_string
|
from synapse.util.versionstring import get_version_string
|
||||||
@ -101,12 +102,17 @@ class NoAuthenticationError(AuthenticationError):
|
|||||||
|
|
||||||
|
|
||||||
class Authenticator(object):
|
class Authenticator(object):
|
||||||
def __init__(self, hs):
|
def __init__(self, hs: HomeServer):
|
||||||
self._clock = hs.get_clock()
|
self._clock = hs.get_clock()
|
||||||
self.keyring = hs.get_keyring()
|
self.keyring = hs.get_keyring()
|
||||||
self.server_name = hs.hostname
|
self.server_name = hs.hostname
|
||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self.federation_domain_whitelist = hs.config.federation_domain_whitelist
|
self.federation_domain_whitelist = hs.config.federation_domain_whitelist
|
||||||
|
self.notifer = hs.get_notifier()
|
||||||
|
|
||||||
|
self.replication_client = None
|
||||||
|
if hs.config.worker.worker_app:
|
||||||
|
self.replication_client = hs.get_tcp_replication()
|
||||||
|
|
||||||
# A method just so we can pass 'self' as the authenticator to the Servlets
|
# A method just so we can pass 'self' as the authenticator to the Servlets
|
||||||
async def authenticate_request(self, request, content):
|
async def authenticate_request(self, request, content):
|
||||||
@ -166,6 +172,17 @@ class Authenticator(object):
|
|||||||
try:
|
try:
|
||||||
logger.info("Marking origin %r as up", origin)
|
logger.info("Marking origin %r as up", origin)
|
||||||
await self.store.set_destination_retry_timings(origin, None, 0, 0)
|
await self.store.set_destination_retry_timings(origin, None, 0, 0)
|
||||||
|
|
||||||
|
# Inform the relevant places that the remote server is back up.
|
||||||
|
self.notifer.notify_remote_server_up(origin)
|
||||||
|
if self.replication_client:
|
||||||
|
# If we're on a worker we try and inform master about this. The
|
||||||
|
# replication client doesn't hook into the notifier to avoid
|
||||||
|
# infinite loops where we send a `REMOTE_SERVER_UP` command to
|
||||||
|
# master, which then echoes it back to us which in turn pokes
|
||||||
|
# the notifier.
|
||||||
|
self.replication_client.send_remote_server_up(origin)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Error resetting retry timings on %s", origin)
|
logger.exception("Error resetting retry timings on %s", origin)
|
||||||
|
|
||||||
|
@ -15,11 +15,13 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
from typing import Callable, List
|
||||||
|
|
||||||
from prometheus_client import Counter
|
from prometheus_client import Counter
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
|
import synapse.server
|
||||||
from synapse.api.constants import EventTypes, Membership
|
from synapse.api.constants import EventTypes, Membership
|
||||||
from synapse.api.errors import AuthError
|
from synapse.api.errors import AuthError
|
||||||
from synapse.handlers.presence import format_user_presence_state
|
from synapse.handlers.presence import format_user_presence_state
|
||||||
@ -154,7 +156,7 @@ class Notifier(object):
|
|||||||
|
|
||||||
UNUSED_STREAM_EXPIRY_MS = 10 * 60 * 1000
|
UNUSED_STREAM_EXPIRY_MS = 10 * 60 * 1000
|
||||||
|
|
||||||
def __init__(self, hs):
|
def __init__(self, hs: "synapse.server.HomeServer"):
|
||||||
self.user_to_user_stream = {}
|
self.user_to_user_stream = {}
|
||||||
self.room_to_user_streams = {}
|
self.room_to_user_streams = {}
|
||||||
|
|
||||||
@ -164,7 +166,12 @@ class Notifier(object):
|
|||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self.pending_new_room_events = []
|
self.pending_new_room_events = []
|
||||||
|
|
||||||
self.replication_callbacks = []
|
# Called when there are new things to stream over replication
|
||||||
|
self.replication_callbacks = [] # type: List[Callable[[], None]]
|
||||||
|
|
||||||
|
# Called when remote servers have come back online after having been
|
||||||
|
# down.
|
||||||
|
self.remote_server_up_callbacks = [] # type: List[Callable[[str], None]]
|
||||||
|
|
||||||
self.clock = hs.get_clock()
|
self.clock = hs.get_clock()
|
||||||
self.appservice_handler = hs.get_application_service_handler()
|
self.appservice_handler = hs.get_application_service_handler()
|
||||||
@ -205,7 +212,7 @@ class Notifier(object):
|
|||||||
"synapse_notifier_users", "", [], lambda: len(self.user_to_user_stream)
|
"synapse_notifier_users", "", [], lambda: len(self.user_to_user_stream)
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_replication_callback(self, cb):
|
def add_replication_callback(self, cb: Callable[[], None]):
|
||||||
"""Add a callback that will be called when some new data is available.
|
"""Add a callback that will be called when some new data is available.
|
||||||
Callback is not given any arguments. It should *not* return a Deferred - if
|
Callback is not given any arguments. It should *not* return a Deferred - if
|
||||||
it needs to do any asynchronous work, a background thread should be started and
|
it needs to do any asynchronous work, a background thread should be started and
|
||||||
@ -213,6 +220,12 @@ class Notifier(object):
|
|||||||
"""
|
"""
|
||||||
self.replication_callbacks.append(cb)
|
self.replication_callbacks.append(cb)
|
||||||
|
|
||||||
|
def add_remote_server_up_callback(self, cb: Callable[[str], None]):
|
||||||
|
"""Add a callback that will be called when synapse detects a server
|
||||||
|
has been
|
||||||
|
"""
|
||||||
|
self.remote_server_up_callbacks.append(cb)
|
||||||
|
|
||||||
def on_new_room_event(
|
def on_new_room_event(
|
||||||
self, event, room_stream_id, max_room_stream_id, extra_users=[]
|
self, event, room_stream_id, max_room_stream_id, extra_users=[]
|
||||||
):
|
):
|
||||||
@ -522,3 +535,15 @@ class Notifier(object):
|
|||||||
"""Notify the any replication listeners that there's a new event"""
|
"""Notify the any replication listeners that there's a new event"""
|
||||||
for cb in self.replication_callbacks:
|
for cb in self.replication_callbacks:
|
||||||
cb()
|
cb()
|
||||||
|
|
||||||
|
def notify_remote_server_up(self, server: str):
|
||||||
|
"""Notify any replication that a remote server has come back up
|
||||||
|
"""
|
||||||
|
# We call federation_sender directly rather than registering as a
|
||||||
|
# callback as a) we already have a reference to it and b) it introduces
|
||||||
|
# circular dependencies.
|
||||||
|
if self.federation_sender:
|
||||||
|
self.federation_sender.wake_destination(server)
|
||||||
|
|
||||||
|
for cb in self.remote_server_up_callbacks:
|
||||||
|
cb(server)
|
||||||
|
@ -143,6 +143,9 @@ class ReplicationClientHandler(AbstractReplicationClientHandler):
|
|||||||
if d:
|
if d:
|
||||||
d.callback(data)
|
d.callback(data)
|
||||||
|
|
||||||
|
def on_remote_server_up(self, server: str):
|
||||||
|
"""Called when get a new REMOTE_SERVER_UP command."""
|
||||||
|
|
||||||
def get_streams_to_replicate(self) -> Dict[str, int]:
|
def get_streams_to_replicate(self) -> Dict[str, int]:
|
||||||
"""Called when a new connection has been established and we need to
|
"""Called when a new connection has been established and we need to
|
||||||
subscribe to streams.
|
subscribe to streams.
|
||||||
|
@ -387,6 +387,20 @@ class UserIpCommand(Command):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteServerUpCommand(Command):
|
||||||
|
"""Sent when a worker has detected that a remote server is no longer
|
||||||
|
"down" and retry timings should be reset.
|
||||||
|
|
||||||
|
If sent from a client the server will relay to all other workers.
|
||||||
|
|
||||||
|
Format::
|
||||||
|
|
||||||
|
REMOTE_SERVER_UP <server>
|
||||||
|
"""
|
||||||
|
|
||||||
|
NAME = "REMOTE_SERVER_UP"
|
||||||
|
|
||||||
|
|
||||||
_COMMANDS = (
|
_COMMANDS = (
|
||||||
ServerCommand,
|
ServerCommand,
|
||||||
RdataCommand,
|
RdataCommand,
|
||||||
@ -401,6 +415,7 @@ _COMMANDS = (
|
|||||||
RemovePusherCommand,
|
RemovePusherCommand,
|
||||||
InvalidateCacheCommand,
|
InvalidateCacheCommand,
|
||||||
UserIpCommand,
|
UserIpCommand,
|
||||||
|
RemoteServerUpCommand,
|
||||||
) # type: Tuple[Type[Command], ...]
|
) # type: Tuple[Type[Command], ...]
|
||||||
|
|
||||||
# Map of command name to command type.
|
# Map of command name to command type.
|
||||||
@ -414,6 +429,7 @@ VALID_SERVER_COMMANDS = (
|
|||||||
ErrorCommand.NAME,
|
ErrorCommand.NAME,
|
||||||
PingCommand.NAME,
|
PingCommand.NAME,
|
||||||
SyncCommand.NAME,
|
SyncCommand.NAME,
|
||||||
|
RemoteServerUpCommand.NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
# The commands the client is allowed to send
|
# The commands the client is allowed to send
|
||||||
@ -427,4 +443,5 @@ VALID_CLIENT_COMMANDS = (
|
|||||||
InvalidateCacheCommand.NAME,
|
InvalidateCacheCommand.NAME,
|
||||||
UserIpCommand.NAME,
|
UserIpCommand.NAME,
|
||||||
ErrorCommand.NAME,
|
ErrorCommand.NAME,
|
||||||
|
RemoteServerUpCommand.NAME,
|
||||||
)
|
)
|
||||||
|
@ -76,6 +76,7 @@ from synapse.replication.tcp.commands import (
|
|||||||
PingCommand,
|
PingCommand,
|
||||||
PositionCommand,
|
PositionCommand,
|
||||||
RdataCommand,
|
RdataCommand,
|
||||||
|
RemoteServerUpCommand,
|
||||||
ReplicateCommand,
|
ReplicateCommand,
|
||||||
ServerCommand,
|
ServerCommand,
|
||||||
SyncCommand,
|
SyncCommand,
|
||||||
@ -460,6 +461,9 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
|
|||||||
async def on_INVALIDATE_CACHE(self, cmd):
|
async def on_INVALIDATE_CACHE(self, cmd):
|
||||||
self.streamer.on_invalidate_cache(cmd.cache_func, cmd.keys)
|
self.streamer.on_invalidate_cache(cmd.cache_func, cmd.keys)
|
||||||
|
|
||||||
|
async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand):
|
||||||
|
self.streamer.on_remote_server_up(cmd.data)
|
||||||
|
|
||||||
async def on_USER_IP(self, cmd):
|
async def on_USER_IP(self, cmd):
|
||||||
self.streamer.on_user_ip(
|
self.streamer.on_user_ip(
|
||||||
cmd.user_id,
|
cmd.user_id,
|
||||||
@ -555,6 +559,9 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
|
|||||||
def send_sync(self, data):
|
def send_sync(self, data):
|
||||||
self.send_command(SyncCommand(data))
|
self.send_command(SyncCommand(data))
|
||||||
|
|
||||||
|
def send_remote_server_up(self, server: str):
|
||||||
|
self.send_command(RemoteServerUpCommand(server))
|
||||||
|
|
||||||
def on_connection_closed(self):
|
def on_connection_closed(self):
|
||||||
BaseReplicationStreamProtocol.on_connection_closed(self)
|
BaseReplicationStreamProtocol.on_connection_closed(self)
|
||||||
self.streamer.lost_connection(self)
|
self.streamer.lost_connection(self)
|
||||||
@ -588,6 +595,11 @@ class AbstractReplicationClientHandler(metaclass=abc.ABCMeta):
|
|||||||
"""Called when get a new SYNC command."""
|
"""Called when get a new SYNC command."""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
async def on_remote_server_up(self, server: str):
|
||||||
|
"""Called when get a new REMOTE_SERVER_UP command."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_streams_to_replicate(self):
|
def get_streams_to_replicate(self):
|
||||||
"""Called when a new connection has been established and we need to
|
"""Called when a new connection has been established and we need to
|
||||||
@ -707,6 +719,9 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
|
|||||||
async def on_SYNC(self, cmd):
|
async def on_SYNC(self, cmd):
|
||||||
self.handler.on_sync(cmd.data)
|
self.handler.on_sync(cmd.data)
|
||||||
|
|
||||||
|
async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand):
|
||||||
|
self.handler.on_remote_server_up(cmd.data)
|
||||||
|
|
||||||
def replicate(self, stream_name, token):
|
def replicate(self, stream_name, token):
|
||||||
"""Send the subscription request to the server
|
"""Send the subscription request to the server
|
||||||
"""
|
"""
|
||||||
|
@ -120,6 +120,7 @@ class ReplicationStreamer(object):
|
|||||||
self.federation_sender = hs.get_federation_sender()
|
self.federation_sender = hs.get_federation_sender()
|
||||||
|
|
||||||
self.notifier.add_replication_callback(self.on_notifier_poke)
|
self.notifier.add_replication_callback(self.on_notifier_poke)
|
||||||
|
self.notifier.add_remote_server_up_callback(self.send_remote_server_up)
|
||||||
|
|
||||||
# Keeps track of whether we are currently checking for updates
|
# Keeps track of whether we are currently checking for updates
|
||||||
self.is_looping = False
|
self.is_looping = False
|
||||||
@ -288,6 +289,14 @@ class ReplicationStreamer(object):
|
|||||||
)
|
)
|
||||||
await self._server_notices_sender.on_user_ip(user_id)
|
await self._server_notices_sender.on_user_ip(user_id)
|
||||||
|
|
||||||
|
@measure_func("repl.on_remote_server_up")
|
||||||
|
def on_remote_server_up(self, server: str):
|
||||||
|
self.notifier.notify_remote_server_up(server)
|
||||||
|
|
||||||
|
def send_remote_server_up(self, server: str):
|
||||||
|
for conn in self.connections:
|
||||||
|
conn.send_remote_server_up(server)
|
||||||
|
|
||||||
def send_sync_to_all_connections(self, data):
|
def send_sync_to_all_connections(self, data):
|
||||||
"""Sends a SYNC command to all clients.
|
"""Sends a SYNC command to all clients.
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import twisted.internet
|
||||||
|
|
||||||
import synapse.api.auth
|
import synapse.api.auth
|
||||||
import synapse.config.homeserver
|
import synapse.config.homeserver
|
||||||
import synapse.federation.sender
|
import synapse.federation.sender
|
||||||
@ -9,10 +11,12 @@ import synapse.handlers.deactivate_account
|
|||||||
import synapse.handlers.device
|
import synapse.handlers.device
|
||||||
import synapse.handlers.e2e_keys
|
import synapse.handlers.e2e_keys
|
||||||
import synapse.handlers.message
|
import synapse.handlers.message
|
||||||
|
import synapse.handlers.presence
|
||||||
import synapse.handlers.room
|
import synapse.handlers.room
|
||||||
import synapse.handlers.room_member
|
import synapse.handlers.room_member
|
||||||
import synapse.handlers.set_password
|
import synapse.handlers.set_password
|
||||||
import synapse.http.client
|
import synapse.http.client
|
||||||
|
import synapse.notifier
|
||||||
import synapse.rest.media.v1.media_repository
|
import synapse.rest.media.v1.media_repository
|
||||||
import synapse.server_notices.server_notices_manager
|
import synapse.server_notices.server_notices_manager
|
||||||
import synapse.server_notices.server_notices_sender
|
import synapse.server_notices.server_notices_sender
|
||||||
@ -85,3 +89,11 @@ class HomeServer(object):
|
|||||||
self,
|
self,
|
||||||
) -> synapse.server_notices.server_notices_sender.ServerNoticesSender:
|
) -> synapse.server_notices.server_notices_sender.ServerNoticesSender:
|
||||||
pass
|
pass
|
||||||
|
def get_notifier(self) -> synapse.notifier.Notifier:
|
||||||
|
pass
|
||||||
|
def get_presence_handler(self) -> synapse.handlers.presence.PresenceHandler:
|
||||||
|
pass
|
||||||
|
def get_clock(self) -> synapse.util.Clock:
|
||||||
|
pass
|
||||||
|
def get_reactor(self) -> twisted.internet.base.ReactorBase:
|
||||||
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user