Faster joins: persist to database (#12012)

When we get a partial_state response from send_join, store information in the
database about it:
 * store a record about the room as a whole having partial state, and stash the
   list of member servers too.
 * flag the join event itself as having partial state
 * also, for any new events whose prev-events are partial-stated, note that
   they will *also* be partial-stated.

We don't yet make any attempt to interpret this data, so API calls (and a bunch
of other things) are just going to get incorrect data.
This commit is contained in:
Richard van der Hoff 2022-03-01 12:49:54 +00:00 committed by GitHub
parent 4ccc2d09aa
commit e2e1d90a5e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 297 additions and 32 deletions

View file

@ -0,0 +1,41 @@
/* Copyright 2022 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- rooms which we have done a partial-state-style join to
CREATE TABLE IF NOT EXISTS partial_state_rooms (
room_id TEXT PRIMARY KEY,
FOREIGN KEY(room_id) REFERENCES rooms(room_id)
);
-- a list of remote servers we believe are in the room
CREATE TABLE IF NOT EXISTS partial_state_rooms_servers (
room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
server_name TEXT NOT NULL,
UNIQUE(room_id, server_name)
);
-- a list of events with partial state. We can't store this in the `events` table
-- itself, because `events` is meant to be append-only.
CREATE TABLE IF NOT EXISTS partial_state_events (
-- the room_id is denormalised for efficient indexing (the canonical source is `events`)
room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
event_id TEXT NOT NULL REFERENCES events(event_id),
UNIQUE(event_id)
);
CREATE INDEX IF NOT EXISTS partial_state_events_room_id_idx
ON partial_state_events (room_id);

View file

@ -0,0 +1,72 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This migration adds triggers to the partial_state_events tables to enforce uniqueness
Triggers cannot be expressed in .sql files, so we have to use a separate file.
"""
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
from synapse.storage.types import Cursor
def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
# complain if the room_id in partial_state_events doesn't match
# that in `events`. We already have a fk constraint which ensures that the event
# exists in `events`, so all we have to do is raise if there is a row with a
# matching stream_ordering but not a matching room_id.
if isinstance(database_engine, Sqlite3Engine):
cur.execute(
"""
CREATE TRIGGER IF NOT EXISTS partial_state_events_bad_room_id
BEFORE INSERT ON partial_state_events
FOR EACH ROW
BEGIN
SELECT RAISE(ABORT, 'Incorrect room_id in partial_state_events')
WHERE EXISTS (
SELECT 1 FROM events
WHERE events.event_id = NEW.event_id
AND events.room_id != NEW.room_id
);
END;
"""
)
elif isinstance(database_engine, PostgresEngine):
cur.execute(
"""
CREATE OR REPLACE FUNCTION check_partial_state_events() RETURNS trigger AS $BODY$
BEGIN
IF EXISTS (
SELECT 1 FROM events
WHERE events.event_id = NEW.event_id
AND events.room_id != NEW.room_id
) THEN
RAISE EXCEPTION 'Incorrect room_id in partial_state_events';
END IF;
RETURN NEW;
END;
$BODY$ LANGUAGE plpgsql;
"""
)
cur.execute(
"""
CREATE TRIGGER check_partial_state_events BEFORE INSERT OR UPDATE ON partial_state_events
FOR EACH ROW
EXECUTE PROCEDURE check_partial_state_events()
"""
)
else:
raise NotImplementedError("Unknown database engine")