Refactor state group lookup to reduce DB hits (#4011)

Currently when fetching state groups from the data store we make two
hits two the database: once for members and once for non-members (unless
request is filtered to one or the other). This adds needless load to the
datbase, so this PR refactors the lookup to make only a single database
hit.
This commit is contained in:
Erik Johnston 2018-10-25 17:49:55 +01:00 committed by GitHub
parent e5da60d75d
commit cb53ce9d64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 717 additions and 488 deletions

View file

@ -27,6 +27,7 @@ from twisted.internet import defer
from synapse.api.constants import EventTypes, Membership
from synapse.push.clientformat import format_push_rules_for_user
from synapse.storage.roommember import MemberSummary
from synapse.storage.state import StateFilter
from synapse.types import RoomStreamToken
from synapse.util.async_helpers import concurrently_execute
from synapse.util.caches.expiringcache import ExpiringCache
@ -469,25 +470,20 @@ class SyncHandler(object):
))
@defer.inlineCallbacks
def get_state_after_event(self, event, types=None, filtered_types=None):
def get_state_after_event(self, event, state_filter=StateFilter.all()):
"""
Get the room state after the given event
Args:
event(synapse.events.EventBase): event of interest
types(list[(str, str|None)]|None): List of (type, state_key) tuples
which are used to filter the state fetched. If `state_key` is None,
all events are returned of the given type.
May be None, which matches any key.
filtered_types(list[str]|None): Only apply filtering via `types` to this
list of event types. Other types of events are returned unfiltered.
If None, `types` filtering is applied to all events.
state_filter (StateFilter): The state filter used to fetch state
from the database.
Returns:
A Deferred map from ((type, state_key)->Event)
"""
state_ids = yield self.store.get_state_ids_for_event(
event.event_id, types, filtered_types=filtered_types,
event.event_id, state_filter=state_filter,
)
if event.is_state():
state_ids = state_ids.copy()
@ -495,18 +491,14 @@ class SyncHandler(object):
defer.returnValue(state_ids)
@defer.inlineCallbacks
def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
def get_state_at(self, room_id, stream_position, state_filter=StateFilter.all()):
""" Get the room state at a particular stream position
Args:
room_id(str): room for which to get state
stream_position(StreamToken): point at which to get state
types(list[(str, str|None)]|None): List of (type, state_key) tuples
which are used to filter the state fetched. If `state_key` is None,
all events are returned of the given type.
filtered_types(list[str]|None): Only apply filtering via `types` to this
list of event types. Other types of events are returned unfiltered.
If None, `types` filtering is applied to all events.
state_filter (StateFilter): The state filter used to fetch state
from the database.
Returns:
A Deferred map from ((type, state_key)->Event)
@ -522,7 +514,7 @@ class SyncHandler(object):
if last_events:
last_event = last_events[-1]
state = yield self.get_state_after_event(
last_event, types, filtered_types=filtered_types,
last_event, state_filter=state_filter,
)
else:
@ -563,10 +555,11 @@ class SyncHandler(object):
last_event = last_events[-1]
state_ids = yield self.store.get_state_ids_for_event(
last_event.event_id, [
last_event.event_id,
state_filter=StateFilter.from_types([
(EventTypes.Name, ''),
(EventTypes.CanonicalAlias, ''),
]
]),
)
# this is heavily cached, thus: fast.
@ -717,8 +710,7 @@ class SyncHandler(object):
with Measure(self.clock, "compute_state_delta"):
types = None
filtered_types = None
members_to_fetch = None
lazy_load_members = sync_config.filter_collection.lazy_load_members()
include_redundant_members = (
@ -729,16 +721,21 @@ class SyncHandler(object):
# We only request state for the members needed to display the
# timeline:
types = [
(EventTypes.Member, state_key)
for state_key in set(
event.sender # FIXME: we also care about invite targets etc.
for event in batch.events
)
]
members_to_fetch = set(
event.sender # FIXME: we also care about invite targets etc.
for event in batch.events
)
# only apply the filtering to room members
filtered_types = [EventTypes.Member]
if full_state:
# always make sure we LL ourselves so we know we're in the room
# (if we are) to fix https://github.com/vector-im/riot-web/issues/7209
# We only need apply this on full state syncs given we disabled
# LL for incr syncs in #3840.
members_to_fetch.add(sync_config.user.to_string())
state_filter = StateFilter.from_lazy_load_member_list(members_to_fetch)
else:
state_filter = StateFilter.all()
timeline_state = {
(event.type, event.state_key): event.event_id
@ -746,28 +743,19 @@ class SyncHandler(object):
}
if full_state:
if lazy_load_members:
# always make sure we LL ourselves so we know we're in the room
# (if we are) to fix https://github.com/vector-im/riot-web/issues/7209
# We only need apply this on full state syncs given we disabled
# LL for incr syncs in #3840.
types.append((EventTypes.Member, sync_config.user.to_string()))
if batch:
current_state_ids = yield self.store.get_state_ids_for_event(
batch.events[-1].event_id, types=types,
filtered_types=filtered_types,
batch.events[-1].event_id, state_filter=state_filter,
)
state_ids = yield self.store.get_state_ids_for_event(
batch.events[0].event_id, types=types,
filtered_types=filtered_types,
batch.events[0].event_id, state_filter=state_filter,
)
else:
current_state_ids = yield self.get_state_at(
room_id, stream_position=now_token, types=types,
filtered_types=filtered_types,
room_id, stream_position=now_token,
state_filter=state_filter,
)
state_ids = current_state_ids
@ -781,8 +769,7 @@ class SyncHandler(object):
)
elif batch.limited:
state_at_timeline_start = yield self.store.get_state_ids_for_event(
batch.events[0].event_id, types=types,
filtered_types=filtered_types,
batch.events[0].event_id, state_filter=state_filter,
)
# for now, we disable LL for gappy syncs - see
@ -797,17 +784,15 @@ class SyncHandler(object):
# members to just be ones which were timeline senders, which then ensures
# all of the rest get included in the state block (if we need to know
# about them).
types = None
filtered_types = None
state_filter = StateFilter.all()
state_at_previous_sync = yield self.get_state_at(
room_id, stream_position=since_token, types=types,
filtered_types=filtered_types,
room_id, stream_position=since_token,
state_filter=state_filter,
)
current_state_ids = yield self.store.get_state_ids_for_event(
batch.events[-1].event_id, types=types,
filtered_types=filtered_types,
batch.events[-1].event_id, state_filter=state_filter,
)
state_ids = _calculate_state(
@ -821,7 +806,7 @@ class SyncHandler(object):
else:
state_ids = {}
if lazy_load_members:
if types and batch.events:
if members_to_fetch and batch.events:
# We're returning an incremental sync, with no
# "gap" since the previous sync, so normally there would be
# no state to return.
@ -831,8 +816,12 @@ class SyncHandler(object):
# timeline here, and then dedupe any redundant ones below.
state_ids = yield self.store.get_state_ids_for_event(
batch.events[0].event_id, types=types,
filtered_types=None, # we only want members!
batch.events[0].event_id,
# we only want members!
state_filter=StateFilter.from_types(
(EventTypes.Member, member)
for member in members_to_fetch
),
)
if lazy_load_members and not include_redundant_members: