Add ability to shard the federation sender (#7798)

This commit is contained in:
Erik Johnston 2020-07-10 18:26:36 +01:00 committed by GitHub
parent f1245dc3c0
commit f299441cc6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 670 additions and 157 deletions

View file

@ -45,7 +45,7 @@ from twisted.internet import defer
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.storage._base import SQLBaseStore
from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
from synapse.storage.database import Database
from synapse.storage.database import Database, make_in_list_sql_clause
from synapse.storage.engines import PostgresEngine
from synapse.types import RoomStreamToken
from synapse.util.caches.stream_change_cache import StreamChangeCache
@ -253,6 +253,16 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
def __init__(self, database: Database, db_conn, hs):
super(StreamWorkerStore, self).__init__(database, db_conn, hs)
self._instance_name = hs.get_instance_name()
self._send_federation = hs.should_send_federation()
self._federation_shard_config = hs.config.federation.federation_shard_config
# If we're a process that sends federation we may need to reset the
# `federation_stream_position` table to match the current sharding
# config. We don't do this now as otherwise two processes could conflict
# during startup which would cause one to die.
self._need_to_reset_federation_stream_positions = self._send_federation
events_max = self.get_room_max_stream_ordering()
event_cache_prefill, min_event_val = self.db.get_cache_dict(
db_conn,
@ -793,22 +803,95 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
return upper_bound, events
def get_federation_out_pos(self, typ):
return self.db.simple_select_one_onecol(
async def get_federation_out_pos(self, typ: str) -> int:
if self._need_to_reset_federation_stream_positions:
await self.db.runInteraction(
"_reset_federation_positions_txn", self._reset_federation_positions_txn
)
self._need_to_reset_federation_stream_positions = False
return await self.db.simple_select_one_onecol(
table="federation_stream_position",
retcol="stream_id",
keyvalues={"type": typ},
keyvalues={"type": typ, "instance_name": self._instance_name},
desc="get_federation_out_pos",
)
def update_federation_out_pos(self, typ, stream_id):
return self.db.simple_update_one(
async def update_federation_out_pos(self, typ, stream_id):
if self._need_to_reset_federation_stream_positions:
await self.db.runInteraction(
"_reset_federation_positions_txn", self._reset_federation_positions_txn
)
self._need_to_reset_federation_stream_positions = False
return await self.db.simple_update_one(
table="federation_stream_position",
keyvalues={"type": typ},
keyvalues={"type": typ, "instance_name": self._instance_name},
updatevalues={"stream_id": stream_id},
desc="update_federation_out_pos",
)
def _reset_federation_positions_txn(self, txn):
"""Fiddles with the `federation_stream_position` table to make it match
the configured federation sender instances during start up.
"""
# The federation sender instances may have changed, so we need to
# massage the `federation_stream_position` table to have a row per type
# per instance sending federation. If there is a mismatch we update the
# table with the correct rows using the *minimum* stream ID seen. This
# may result in resending of events/EDUs to remote servers, but that is
# preferable to dropping them.
if not self._send_federation:
return
# Pull out the configured instances. If we don't have a shard config then
# we assume that we're the only instance sending.
configured_instances = self._federation_shard_config.instances
if not configured_instances:
configured_instances = [self._instance_name]
elif self._instance_name not in configured_instances:
return
instances_in_table = self.db.simple_select_onecol_txn(
txn,
table="federation_stream_position",
keyvalues={},
retcol="instance_name",
)
if set(instances_in_table) == set(configured_instances):
# Nothing to do
return
sql = """
SELECT type, MIN(stream_id) FROM federation_stream_position
GROUP BY type
"""
txn.execute(sql)
min_positions = dict(txn) # Map from type -> min position
# Ensure we do actually have some values here
assert set(min_positions) == {"federation", "events"}
sql = """
DELETE FROM federation_stream_position
WHERE NOT (%s)
"""
clause, args = make_in_list_sql_clause(
txn.database_engine, "instance_name", configured_instances
)
txn.execute(sql % (clause,), args)
for typ, stream_id in min_positions.items():
self.db.simple_upsert_txn(
txn,
table="federation_stream_position",
keyvalues={"type": typ, "instance_name": self._instance_name},
values={"stream_id": stream_id},
)
def has_room_changed_since(self, room_id, stream_id):
return self._events_stream_cache.has_entity_changed(room_id, stream_id)