Start implementing incremental initial sync

This commit is contained in:
Mark Haines 2015-01-27 16:24:22 +00:00
parent 436513068d
commit a56008842b
3 changed files with 241 additions and 34 deletions

View File

@ -137,6 +137,7 @@ def serialize_event(e, time_now_ms, client_event=True, strip_ids=False):
d.pop("depth", None) d.pop("depth", None)
d.pop("unsigned", None) d.pop("unsigned", None)
d.pop("origin", None) d.pop("origin", None)
d.pop("prev_state", None)
if strip_ids: if strip_ids:
d.pop("room_id", None) d.pop("room_id", None)

View File

@ -37,14 +37,18 @@ SyncConfig = collections.namedtuple("SyncConfig", [
]) ])
RoomSyncResult = collections.namedtuple("RoomSyncResult", [ class RoomSyncResult(collections.namedtuple("RoomSyncResult", [
"room_id", "room_id",
"limited", "limited",
"published", "published",
"events", # dict of event "events",
"state", "state",
"prev_batch", "prev_batch",
]) ])):
__slots__ = []
def __nonzero__(self):
return bool(self.events or self.state)
class SyncResult(collections.namedtuple("SyncResult", [ class SyncResult(collections.namedtuple("SyncResult", [
@ -56,7 +60,9 @@ class SyncResult(collections.namedtuple("SyncResult", [
__slots__ = [] __slots__ = []
def __nonzero__(self): def __nonzero__(self):
return self.private_user_data or self.public_user_data or self.rooms return bool(
self.private_user_data or self.public_user_data or self.rooms
)
class SyncHandler(BaseHandler): class SyncHandler(BaseHandler):
@ -67,7 +73,13 @@ class SyncHandler(BaseHandler):
self.clock = hs.get_clock() self.clock = hs.get_clock()
def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0): def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0):
if timeout == 0: """Get the sync for a client if we have new data for it now. Otherwise
wait for new data to arrive on the server. If the timeout expires, then
return an empty sync result.
Returns:
A Deferred SyncResult.
"""
if timeout == 0 or since_token is None:
return self.current_sync_for_user(sync_config, since_token) return self.current_sync_for_user(sync_config, since_token)
else: else:
def current_sync_callback(since_token): def current_sync_callback(since_token):
@ -79,13 +91,25 @@ class SyncHandler(BaseHandler):
) )
def current_sync_for_user(self, sync_config, since_token=None): def current_sync_for_user(self, sync_config, since_token=None):
"""Get the sync for client needed to match what the server has now.
Returns:
A Deferred SyncResult.
"""
if since_token is None: if since_token is None:
return self.initial_sync(sync_config) return self.initial_sync(sync_config)
else: else:
return self.incremental_sync(sync_config) if sync_config.gap:
return self.incremental_sync_with_gap(sync_config, since_token)
else:
#TODO(mjark): Handle gapless sync
pass
@defer.inlineCallbacks @defer.inlineCallbacks
def initial_sync(self, sync_config): def initial_sync(self, sync_config):
"""Get a sync for a client which is starting without any state
Returns:
A Deferred SyncResult.
"""
if sync_config.sort == "timeline,desc": if sync_config.sort == "timeline,desc":
# TODO(mjark): Handle going through events in reverse order?. # TODO(mjark): Handle going through events in reverse order?.
# What does "most recent events" mean when applying the limits mean # What does "most recent events" mean when applying the limits mean
@ -114,26 +138,87 @@ class SyncHandler(BaseHandler):
rooms = [] rooms = []
for event in room_list: for event in room_list:
#TODO (mjark): Apply the event filter in sync_config. room_sync = yield self.initial_sync_for_room(
event.room_id, sync_config, now_token, published_room_ids
)
rooms.append(room_sync)
defer.returnValue(SyncResult(
public_user_data=presence,
private_user_data=[],
rooms=rooms,
next_batch=now_token,
))
@defer.inlineCallbacks
def intial_sync_for_room(self, room_id, sync_config, now_token,
published_room_ids):
"""Sync a room for a client which is starting without any state
Returns:
A Deferred RoomSyncResult.
"""
recent_events, token = yield self.store.get_recent_events_for_room( recent_events, token = yield self.store.get_recent_events_for_room(
event.room_id, room_id,
limit=sync_config.limit, limit=sync_config.limit,
end_token=now_token.room_key, end_token=now_token.room_key,
) )
prev_batch_token = now_token.copy_and_replace("room_key", token[0]) prev_batch_token = now_token.copy_and_replace("room_key", token[0])
current_state_events = yield self.state_handler.get_current_state( current_state_events = yield self.state_handler.get_current_state(
event.room_id room_id
) )
rooms.append(RoomSyncResult( defer.returnValue(RoomSyncResult(
room_id=event.room_id, room_id=room_id,
published=event.room_id in published_room_ids, published=room_id in published_room_ids,
events=recent_events, events=recent_events,
prev_batch=prev_batch_token, prev_batch=prev_batch_token,
state=current_state_events, state=current_state_events,
limited=True, limited=True,
)) ))
@defer.inlineCallbacks
def incremental_sync_with_gap(self, sync_config, since_token):
""" Get the incremental delta needed to bring the client up to
date with the server.
Returns:
A Deferred SyncResult.
"""
if sync_config.sort == "timeline,desc":
# TODO(mjark): Handle going through events in reverse order?.
# What does "most recent events" mean when applying the limits mean
# in this case?
raise NotImplementedError()
now_token = yield self.event_sources.get_current_token()
presence_stream = self.event_sources.sources["presence"]
pagination_config = PaginationConfig(
from_token=since_token, to_token=now_token
)
presence, _ = yield presence_stream.get_pagination_rows(
user=sync_config.user,
pagination_config=pagination_config.get_source_config("presence"),
key=None
)
room_list = yield self.store.get_rooms_for_user_where_membership_is(
user_id=sync_config.user.to_string(),
membership_list=[Membership.INVITE, Membership.JOIN]
)
# TODO (mjark): Does public mean "published"?
published_rooms = yield self.store.get_rooms(is_public=True)
published_room_ids = set(r["room_id"] for r in published_rooms)
rooms = []
for event in room_list:
room_sync = yield self.incremental_sync_with_gap_for_room(
event.room_id, sync_config, since_token, now_token,
published_room_ids
)
if room_sync:
rooms.append(room_sync)
defer.returnValue(SyncResult( defer.returnValue(SyncResult(
public_user_data=presence, public_user_data=presence,
private_user_data=[], private_user_data=[],
@ -143,5 +228,103 @@ class SyncHandler(BaseHandler):
@defer.inlineCallbacks @defer.inlineCallbacks
def incremental_sync(self, sync_config): def incremental_sync_with_gap_for_room(self, room_id, sync_config,
pass since_token, now_token,
published_room_ids):
""" Get the incremental delta needed to bring the client up to date for
the room. Gives the client the most recent events and the changes to
state.
Returns:
A Deferred RoomSyncResult
"""
# TODO(mjark): Check if they have joined the room between
# the previous sync and this one.
# TODO(mjark): Apply the event filter in sync_config
# TODO(mjark): Check for redactions we might have missed.
# TODO(mjark): Typing notifications.
recents, token = yield self.store.get_recent_events_for_room(
room_id,
limit=sync_config.limit + 1,
from_token=since_token.room_key,
end_token=now_token.room_key,
)
logging.debug("Recents %r", recents)
if len(recents) > sync_config.limit:
limited = True
recents = recents[1:]
else:
limited = False
prev_batch_token = now_token.copy_and_replace("room_key", token[0])
# TODO(mjark): This seems racy since this isn't being passed a
# token to indicate what point in the stream this is
current_state_events = yield self.state_handler.get_current_state(
room_id
)
state_at_previous_sync = yield self.get_state_at_previous_sync(
room_id, since_token=since_token
)
state_events_delta = yield self.compute_state_delta(
since_token=since_token,
previous_state=state_at_previous_sync,
current_state=current_state_events,
)
room_sync = RoomSyncResult(
room_id=room_id,
published=room_id in published_room_ids,
events=recents,
prev_batch=prev_batch_token,
state=state_events_delta,
limited=limited,
)
logging.debug("Room sync: %r", room_sync)
defer.returnValue(room_sync)
@defer.inlineCallbacks
def get_state_at_previous_sync(self, room_id, since_token):
""" Get the room state at the previous sync the client made.
Returns:
A Deferred list of Events.
"""
last_events, token = yield self.store.get_recent_events_for_room(
room_id, end_token=since_token.room_key, limit=1,
)
if last_events:
last_event = last_events[0]
last_context = yield self.state_handler.compute_event_context(
last_event
)
if last_event.is_state():
state = [last_event] + last_context.current_state.values()
else:
state = last_context.current_state.values()
else:
state = ()
defer.returnValue(state)
def compute_state_delta(self, since_token, previous_state, current_state):
""" Works out the differnce in state between the current state and the
state the client got when it last performed a sync.
Returns:
A list of events.
"""
# TODO(mjark) Check if the state events were received by the server
# after the previous sync, since we need to include those state
# updates even if they occured logically before the previous event.
# TODO(mjark) Check for new redactions in the state events.
previous_dict = {event.event_id:event for event in previous_state}
state_delta = []
for event in current_state:
if event.event_id not in previous_dict:
state_delta.append(event)
return state_delta

View File

@ -265,17 +265,38 @@ class StreamStore(SQLBaseStore):
return self.runInteraction("paginate_room_events", f) return self.runInteraction("paginate_room_events", f)
def get_recent_events_for_room(self, room_id, limit, end_token, def get_recent_events_for_room(self, room_id, limit, end_token,
with_feedback=False): with_feedback=False, from_token=None):
# TODO (erikj): Handle compressed feedback # TODO (erikj): Handle compressed feedback
end_token = _StreamToken.parse_stream_token(end_token)
if from_token is None:
sql = ( sql = (
"SELECT stream_ordering, topological_ordering, event_id FROM events " "SELECT stream_ordering, topological_ordering, event_id"
"WHERE room_id = ? AND stream_ordering <= ? AND outlier = 0 " " FROM events"
"ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ? " " WHERE room_id = ? AND stream_ordering <= ? AND outlier = 0"
" ORDER BY topological_ordering DESC, stream_ordering DESC"
" LIMIT ?"
)
else:
from_token = _StreamToken.parse_stream_token(from_token)
sql = (
"SELECT stream_ordering, topological_ordering, event_id"
" FROM events"
" WHERE room_id = ? AND stream_ordering > ?"
" AND stream_ordering <= ? AND outlier = 0"
" ORDER BY topological_ordering DESC, stream_ordering DESC"
" LIMIT ?"
) )
def f(txn):
txn.execute(sql, (room_id, end_token, limit,)) def get_recent_events_for_room_txn(txn):
if from_token is None:
txn.execute(sql, (room_id, end_token.stream, limit,))
else:
txn.execute(sql, (
room_id, from_token.stream, end_token.stream, limit
))
rows = self.cursor_to_dict(txn) rows = self.cursor_to_dict(txn)
@ -303,7 +324,9 @@ class StreamStore(SQLBaseStore):
return events, token return events, token
return self.runInteraction("get_recent_events_for_room", f) return self.runInteraction(
"get_recent_events_for_room", get_recent_events_for_room_txn
)
def get_room_events_max_id(self): def get_room_events_max_id(self):
return self.runInteraction( return self.runInteraction(