Merge branch 'develop' of github.com:matrix-org/synapse into erikj/purge_state_groups

This commit is contained in:
Erik Johnston 2018-10-19 15:48:59 +01:00
commit 056f099126
70 changed files with 2020 additions and 301 deletions

View file

@ -30,6 +30,7 @@ from .appservice import ApplicationServiceStore, ApplicationServiceTransactionSt
from .client_ips import ClientIpStore
from .deviceinbox import DeviceInboxStore
from .directory import DirectoryStore
from .e2e_room_keys import EndToEndRoomKeyStore
from .end_to_end_keys import EndToEndKeyStore
from .engines import PostgresEngine
from .event_federation import EventFederationStore
@ -77,6 +78,7 @@ class DataStore(RoomMemberStore, RoomStore,
ApplicationServiceTransactionStore,
ReceiptsStore,
EndToEndKeyStore,
EndToEndRoomKeyStore,
SearchStore,
TagsStore,
AccountDataStore,

View file

@ -0,0 +1,320 @@
# -*- coding: utf-8 -*-
# Copyright 2017 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from twisted.internet import defer
from synapse.api.errors import StoreError
from ._base import SQLBaseStore
class EndToEndRoomKeyStore(SQLBaseStore):
@defer.inlineCallbacks
def get_e2e_room_key(self, user_id, version, room_id, session_id):
"""Get the encrypted E2E room key for a given session from a given
backup version of room_keys. We only store the 'best' room key for a given
session at a given time, as determined by the handler.
Args:
user_id(str): the user whose backup we're querying
version(str): the version ID of the backup for the set of keys we're querying
room_id(str): the ID of the room whose keys we're querying.
This is a bit redundant as it's implied by the session_id, but
we include for consistency with the rest of the API.
session_id(str): the session whose room_key we're querying.
Returns:
A deferred dict giving the session_data and message metadata for
this room key.
"""
row = yield self._simple_select_one(
table="e2e_room_keys",
keyvalues={
"user_id": user_id,
"version": version,
"room_id": room_id,
"session_id": session_id,
},
retcols=(
"first_message_index",
"forwarded_count",
"is_verified",
"session_data",
),
desc="get_e2e_room_key",
)
row["session_data"] = json.loads(row["session_data"])
defer.returnValue(row)
@defer.inlineCallbacks
def set_e2e_room_key(self, user_id, version, room_id, session_id, room_key):
"""Replaces or inserts the encrypted E2E room key for a given session in
a given backup
Args:
user_id(str): the user whose backup we're setting
version(str): the version ID of the backup we're updating
room_id(str): the ID of the room whose keys we're setting
session_id(str): the session whose room_key we're setting
room_key(dict): the room_key being set
Raises:
StoreError
"""
yield self._simple_upsert(
table="e2e_room_keys",
keyvalues={
"user_id": user_id,
"room_id": room_id,
"session_id": session_id,
},
values={
"version": version,
"first_message_index": room_key['first_message_index'],
"forwarded_count": room_key['forwarded_count'],
"is_verified": room_key['is_verified'],
"session_data": json.dumps(room_key['session_data']),
},
lock=False,
)
@defer.inlineCallbacks
def get_e2e_room_keys(
self, user_id, version, room_id=None, session_id=None
):
"""Bulk get the E2E room keys for a given backup, optionally filtered to a given
room, or a given session.
Args:
user_id(str): the user whose backup we're querying
version(str): the version ID of the backup for the set of keys we're querying
room_id(str): Optional. the ID of the room whose keys we're querying, if any.
If not specified, we return the keys for all the rooms in the backup.
session_id(str): Optional. the session whose room_key we're querying, if any.
If specified, we also require the room_id to be specified.
If not specified, we return all the keys in this version of
the backup (or for the specified room)
Returns:
A deferred list of dicts giving the session_data and message metadata for
these room keys.
"""
keyvalues = {
"user_id": user_id,
"version": version,
}
if room_id:
keyvalues['room_id'] = room_id
if session_id:
keyvalues['session_id'] = session_id
rows = yield self._simple_select_list(
table="e2e_room_keys",
keyvalues=keyvalues,
retcols=(
"user_id",
"room_id",
"session_id",
"first_message_index",
"forwarded_count",
"is_verified",
"session_data",
),
desc="get_e2e_room_keys",
)
sessions = {'rooms': {}}
for row in rows:
room_entry = sessions['rooms'].setdefault(row['room_id'], {"sessions": {}})
room_entry['sessions'][row['session_id']] = {
"first_message_index": row["first_message_index"],
"forwarded_count": row["forwarded_count"],
"is_verified": row["is_verified"],
"session_data": json.loads(row["session_data"]),
}
defer.returnValue(sessions)
@defer.inlineCallbacks
def delete_e2e_room_keys(
self, user_id, version, room_id=None, session_id=None
):
"""Bulk delete the E2E room keys for a given backup, optionally filtered to a given
room or a given session.
Args:
user_id(str): the user whose backup we're deleting from
version(str): the version ID of the backup for the set of keys we're deleting
room_id(str): Optional. the ID of the room whose keys we're deleting, if any.
If not specified, we delete the keys for all the rooms in the backup.
session_id(str): Optional. the session whose room_key we're querying, if any.
If specified, we also require the room_id to be specified.
If not specified, we delete all the keys in this version of
the backup (or for the specified room)
Returns:
A deferred of the deletion transaction
"""
keyvalues = {
"user_id": user_id,
"version": version,
}
if room_id:
keyvalues['room_id'] = room_id
if session_id:
keyvalues['session_id'] = session_id
yield self._simple_delete(
table="e2e_room_keys",
keyvalues=keyvalues,
desc="delete_e2e_room_keys",
)
@staticmethod
def _get_current_version(txn, user_id):
txn.execute(
"SELECT MAX(version) FROM e2e_room_keys_versions "
"WHERE user_id=? AND deleted=0",
(user_id,)
)
row = txn.fetchone()
if not row:
raise StoreError(404, 'No current backup version')
return row[0]
def get_e2e_room_keys_version_info(self, user_id, version=None):
"""Get info metadata about a version of our room_keys backup.
Args:
user_id(str): the user whose backup we're querying
version(str): Optional. the version ID of the backup we're querying about
If missing, we return the information about the current version.
Raises:
StoreError: with code 404 if there are no e2e_room_keys_versions present
Returns:
A deferred dict giving the info metadata for this backup version
"""
def _get_e2e_room_keys_version_info_txn(txn):
if version is None:
this_version = self._get_current_version(txn, user_id)
else:
this_version = version
result = self._simple_select_one_txn(
txn,
table="e2e_room_keys_versions",
keyvalues={
"user_id": user_id,
"version": this_version,
"deleted": 0,
},
retcols=(
"version",
"algorithm",
"auth_data",
),
)
result["auth_data"] = json.loads(result["auth_data"])
return result
return self.runInteraction(
"get_e2e_room_keys_version_info",
_get_e2e_room_keys_version_info_txn
)
def create_e2e_room_keys_version(self, user_id, info):
"""Atomically creates a new version of this user's e2e_room_keys store
with the given version info.
Args:
user_id(str): the user whose backup we're creating a version
info(dict): the info about the backup version to be created
Returns:
A deferred string for the newly created version ID
"""
def _create_e2e_room_keys_version_txn(txn):
txn.execute(
"SELECT MAX(version) FROM e2e_room_keys_versions WHERE user_id=?",
(user_id,)
)
current_version = txn.fetchone()[0]
if current_version is None:
current_version = '0'
new_version = str(int(current_version) + 1)
self._simple_insert_txn(
txn,
table="e2e_room_keys_versions",
values={
"user_id": user_id,
"version": new_version,
"algorithm": info["algorithm"],
"auth_data": json.dumps(info["auth_data"]),
},
)
return new_version
return self.runInteraction(
"create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn
)
def delete_e2e_room_keys_version(self, user_id, version=None):
"""Delete a given backup version of the user's room keys.
Doesn't delete their actual key data.
Args:
user_id(str): the user whose backup version we're deleting
version(str): Optional. the version ID of the backup version we're deleting
If missing, we delete the current backup version info.
Raises:
StoreError: with code 404 if there are no e2e_room_keys_versions present,
or if the version requested doesn't exist.
"""
def _delete_e2e_room_keys_version_txn(txn):
if version is None:
this_version = self._get_current_version(txn, user_id)
else:
this_version = version
return self._simple_update_one_txn(
txn,
table="e2e_room_keys_versions",
keyvalues={
"user_id": user_id,
"version": this_version,
},
updatevalues={
"deleted": 1,
}
)
return self.runInteraction(
"delete_e2e_room_keys_version",
_delete_e2e_room_keys_version_txn
)

View file

@ -376,33 +376,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
@defer.inlineCallbacks
def get_missing_events(self, room_id, earliest_events, latest_events,
limit, min_depth):
limit):
ids = yield self.runInteraction(
"get_missing_events",
self._get_missing_events,
room_id, earliest_events, latest_events, limit, min_depth
room_id, earliest_events, latest_events, limit,
)
events = yield self._get_events(ids)
events = sorted(
[ev for ev in events if ev.depth >= min_depth],
key=lambda e: e.depth,
)
defer.returnValue(events[:limit])
defer.returnValue(events)
def _get_missing_events(self, txn, room_id, earliest_events, latest_events,
limit, min_depth):
limit):
earliest_events = set(earliest_events)
front = set(latest_events) - earliest_events
event_results = set()
seen_events = set(earliest_events)
front = set(latest_events) - seen_events
event_results = []
query = (
"SELECT prev_event_id FROM event_edges "
"WHERE event_id = ? AND is_state = ? "
"WHERE room_id = ? AND event_id = ? AND is_state = ? "
"LIMIT ?"
)
@ -411,18 +403,20 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
for event_id in front:
txn.execute(
query,
(event_id, False, limit - len(event_results))
(room_id, event_id, False, limit - len(event_results))
)
for e_id, in txn:
new_front.add(e_id)
new_results = set(t[0] for t in txn) - seen_events
new_front -= earliest_events
new_front -= event_results
new_front |= new_results
seen_events |= new_results
event_results.extend(new_results)
front = new_front
event_results |= new_front
# we built the list working backwards from latest_events; we now need to
# reverse it so that the events are approximately chronological.
event_results.reverse()
return event_results

View file

@ -39,6 +39,7 @@ from synapse.storage.event_federation import EventFederationStore
from synapse.storage.events_worker import EventsWorkerStore
from synapse.storage.state import StateGroupWorkerStore
from synapse.types import RoomStreamToken, get_domain_from_id
from synapse.util import batch_iter
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
from synapse.util.frozenutils import frozendict_json_encoder
@ -388,12 +389,10 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
)
for room_id, ev_ctx_rm in iteritems(events_by_room):
# Work out new extremities by recursively adding and removing
# the new events.
latest_event_ids = yield self.get_latest_event_ids_in_room(
room_id
)
new_latest_event_ids = yield self._calculate_new_extremeties(
new_latest_event_ids = yield self._calculate_new_extremities(
room_id, ev_ctx_rm, latest_event_ids
)
@ -402,6 +401,12 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
# No change in extremities, so no change in state
continue
# there should always be at least one forward extremity.
# (except during the initial persistence of the send_join
# results, in which case there will be no existing
# extremities, so we'll `continue` above and skip this bit.)
assert new_latest_event_ids, "No forward extremities left!"
new_forward_extremeties[room_id] = new_latest_event_ids
len_1 = (
@ -519,44 +524,79 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
)
@defer.inlineCallbacks
def _calculate_new_extremeties(self, room_id, event_contexts, latest_event_ids):
"""Calculates the new forward extremeties for a room given events to
def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids):
"""Calculates the new forward extremities for a room given events to
persist.
Assumes that we are only persisting events for one room at a time.
"""
new_latest_event_ids = set(latest_event_ids)
# First, add all the new events to the list
new_latest_event_ids.update(
event.event_id for event, ctx in event_contexts
# we're only interested in new events which aren't outliers and which aren't
# being rejected.
new_events = [
event for event, ctx in event_contexts
if not event.internal_metadata.is_outlier() and not ctx.rejected
]
# start with the existing forward extremities
result = set(latest_event_ids)
# add all the new events to the list
result.update(
event.event_id for event in new_events
)
# Now remove all events that are referenced by the to-be-added events
new_latest_event_ids.difference_update(
# Now remove all events which are prev_events of any of the new events
result.difference_update(
e_id
for event, ctx in event_contexts
for event in new_events
for e_id, _ in event.prev_events
if not event.internal_metadata.is_outlier() and not ctx.rejected
)
# And finally remove any events that are referenced by previously added
# events.
rows = yield self._simple_select_many_batch(
table="event_edges",
column="prev_event_id",
iterable=list(new_latest_event_ids),
retcols=["prev_event_id"],
keyvalues={
"is_state": False,
},
desc="_calculate_new_extremeties",
)
# Finally, remove any events which are prev_events of any existing events.
existing_prevs = yield self._get_events_which_are_prevs(result)
result.difference_update(existing_prevs)
new_latest_event_ids.difference_update(
row["prev_event_id"] for row in rows
)
defer.returnValue(result)
defer.returnValue(new_latest_event_ids)
@defer.inlineCallbacks
def _get_events_which_are_prevs(self, event_ids):
"""Filter the supplied list of event_ids to get those which are prev_events of
existing (non-outlier/rejected) events.
Args:
event_ids (Iterable[str]): event ids to filter
Returns:
Deferred[List[str]]: filtered event ids
"""
results = []
def _get_events(txn, batch):
sql = """
SELECT prev_event_id
FROM event_edges
INNER JOIN events USING (event_id)
LEFT JOIN rejections USING (event_id)
WHERE
prev_event_id IN (%s)
AND NOT events.outlier
AND rejections.event_id IS NULL
""" % (
",".join("?" for _ in batch),
)
txn.execute(sql, batch)
results.extend(r[0] for r in txn)
for chunk in batch_iter(event_ids, 100):
yield self.runInteraction(
"_get_events_which_are_prevs",
_get_events,
chunk,
)
defer.returnValue(results)
@defer.inlineCallbacks
def _get_new_state_after_events(self, room_id, events_context, old_latest_event_ids,
@ -588,10 +628,6 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
the new current state is only returned if we've already calculated
it.
"""
if not new_latest_event_ids:
return
# map from state_group to ((type, key) -> event_id) state map
state_groups_map = {}

View file

@ -0,0 +1,39 @@
/* Copyright 2017 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- users' optionally backed up encrypted e2e sessions
CREATE TABLE e2e_room_keys (
user_id TEXT NOT NULL,
room_id TEXT NOT NULL,
session_id TEXT NOT NULL,
version TEXT NOT NULL,
first_message_index INT,
forwarded_count INT,
is_verified BOOLEAN,
session_data TEXT NOT NULL
);
CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
-- the metadata for each generation of encrypted e2e session backups
CREATE TABLE e2e_room_keys_versions (
user_id TEXT NOT NULL,
version TEXT NOT NULL,
algorithm TEXT NOT NULL,
auth_data TEXT NOT NULL,
deleted SMALLINT DEFAULT 0 NOT NULL
);
CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);

View file

@ -630,7 +630,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
@defer.inlineCallbacks
def get_all_new_events_stream(self, from_id, current_id, limit):
"""Get all new events"""
"""Get all new events
Returns all events with from_id < stream_ordering <= current_id.
Args:
from_id (int): the stream_ordering of the last event we processed
current_id (int): the stream_ordering of the most recently processed event
limit (int): the maximum number of events to return
Returns:
Deferred[Tuple[int, list[FrozenEvent]]]: A tuple of (next_id, events), where
`next_id` is the next value to pass as `from_id` (it will either be the
stream_ordering of the last returned event, or, if fewer than `limit` events
were found, `current_id`.
"""
def get_all_new_events_stream_txn(txn):
sql = (