From 90abdaf3bcd3eceb6dd5688f9ef3623382883173 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 1 Jun 2015 10:51:50 +0100 Subject: [PATCH 001/266] Use Twisted-15.2.1, Use Agent.usingEndpointFactory rather than implement our own Agent --- synapse/http/matrixfederationclient.py | 75 +++++++++----------------- synapse/python_dependencies.py | 2 +- 2 files changed, 26 insertions(+), 51 deletions(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 7f3d8fc88..ec5b06ddc 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -16,7 +16,7 @@ from twisted.internet import defer, reactor, protocol from twisted.internet.error import DNSLookupError -from twisted.web.client import readBody, _AgentBase, _URI, HTTPConnectionPool +from twisted.web.client import readBody, HTTPConnectionPool, Agent from twisted.web.http_headers import Headers from twisted.web._newclient import ResponseDone @@ -53,41 +53,17 @@ incoming_responses_counter = metrics.register_counter( ) -class MatrixFederationHttpAgent(_AgentBase): +class MatrixFederationEndpointFactory(object): + def __init__(self, hs): + self.tls_context_factory = hs.tls_context_factory - def __init__(self, reactor, pool=None): - _AgentBase.__init__(self, reactor, pool) + def endpointForURI(self, uri): + destination = uri.netloc - def request(self, destination, endpoint, method, path, params, query, - headers, body_producer): - - outgoing_requests_counter.inc(method) - - host = b"" - port = 0 - fragment = b"" - - parsed_URI = _URI(b"http", destination, host, port, path, params, - query, fragment) - - # Set the connection pool key to be the destination. - key = destination - - d = self._requestWithEndpoint(key, endpoint, method, parsed_URI, - headers, body_producer, - parsed_URI.originForm) - - def _cb(response): - incoming_responses_counter.inc(method, response.code) - return response - - def _eb(failure): - incoming_responses_counter.inc(method, "ERR") - return failure - - d.addCallbacks(_cb, _eb) - - return d + return matrix_federation_endpoint( + reactor, destination, timeout=10, + ssl_context_factory=self.tls_context_factory + ) class MatrixFederationHttpClient(object): @@ -105,10 +81,17 @@ class MatrixFederationHttpClient(object): self.server_name = hs.hostname pool = HTTPConnectionPool(reactor) pool.maxPersistentPerHost = 10 - self.agent = MatrixFederationHttpAgent(reactor, pool=pool) + self.agent = Agent.usingEndpointFactory( + reactor, MatrixFederationEndpointFactory(hs), pool=pool + ) self.clock = hs.get_clock() self.version_string = hs.version_string + def _create_url(self, destination, path_bytes, param_bytes, query_bytes): + return urlparse.urlunparse( + ("matrix", destination, path_bytes, param_bytes, query_bytes, "") + ) + @defer.inlineCallbacks def _create_request(self, destination, method, path_bytes, body_callback, headers_dict={}, param_bytes=b"", @@ -119,8 +102,8 @@ class MatrixFederationHttpClient(object): headers_dict[b"User-Agent"] = [self.version_string] headers_dict[b"Host"] = [destination] - url_bytes = urlparse.urlunparse( - ("", "", path_bytes, param_bytes, query_bytes, "",) + url_bytes = self._create_url( + destination, path_bytes, param_bytes, query_bytes ) logger.info("Sending request to %s: %s %s", @@ -139,22 +122,20 @@ class MatrixFederationHttpClient(object): # (once we have reliable transactions in place) retries_left = 5 - endpoint = self._getEndpoint(reactor, destination) + http_url_bytes = urlparse.urlunparse( + ("", "", path_bytes, param_bytes, query_bytes, "") + ) while True: producer = None if body_callback: - producer = body_callback(method, url_bytes, headers_dict) + producer = body_callback(method, http_url_bytes, headers_dict) try: request_deferred = preserve_context_over_fn( self.agent.request, - destination, - endpoint, method, - path_bytes, - param_bytes, - query_bytes, + url_bytes, Headers(headers_dict), producer ) @@ -442,12 +423,6 @@ class MatrixFederationHttpClient(object): defer.returnValue((length, headers)) - def _getEndpoint(self, reactor, destination): - return matrix_federation_endpoint( - reactor, destination, timeout=10, - ssl_context_factory=self.hs.tls_context_factory - ) - class _ReadBodyToFileProtocol(protocol.Protocol): def __init__(self, stream, deferred, max_size): diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index a45dd3c93..d1bf6d50c 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) REQUIREMENTS = { "syutil>=0.0.6": ["syutil>=0.0.6"], - "Twisted==14.0.2": ["twisted==14.0.2"], + "Twisted==15.2.1": ["twisted==15.2.1"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], "pyyaml": ["yaml"], From 378a0f7a79a22a1211e3d048b7fd9e99d1efc7de Mon Sep 17 00:00:00 2001 From: manuroe Date: Thu, 4 Jun 2015 17:58:17 +0200 Subject: [PATCH 002/266] Federation demo start.sh: Fixed --no-rate-limit param in the script --- demo/clean.sh | 6 ++++-- demo/start.sh | 17 +++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/demo/clean.sh b/demo/clean.sh index c5dabd476..31007737f 100755 --- a/demo/clean.sh +++ b/demo/clean.sh @@ -11,7 +11,9 @@ if [ -f $PID_FILE ]; then exit 1 fi -find "$DIR" -name "*.log" -delete -find "$DIR" -name "*.db" -delete +for port in 8080 8081 8082; do + rm -rf demo/$port + rm -rf demo/media_store.$port +done rm -rf $DIR/etc diff --git a/demo/start.sh b/demo/start.sh index b9cc14b9d..b5dea5e17 100755 --- a/demo/start.sh +++ b/demo/start.sh @@ -8,14 +8,6 @@ cd "$DIR/.." mkdir -p demo/etc -# Check the --no-rate-limit param -PARAMS="" -if [ $# -eq 1 ]; then - if [ $1 = "--no-rate-limit" ]; then - PARAMS="--rc-messages-per-second 1000 --rc-message-burst-count 1000" - fi -fi - export PYTHONPATH=$(readlink -f $(pwd)) @@ -35,6 +27,15 @@ for port in 8080 8081 8082; do -H "localhost:$https_port" \ --config-path "$DIR/etc/$port.config" \ + # Check script parameters + if [ $# -eq 1 ]; then + if [ $1 = "--no-rate-limit" ]; then + # Set high limits in config file to disable rate limiting + perl -p -i -e 's/rc_messages_per_second.*/rc_messages_per_second: 1000/g' $DIR/etc/$port.config + perl -p -i -e 's/rc_message_burst_count.*/rc_message_burst_count: 1000/g' $DIR/etc/$port.config + fi + fi + python -m synapse.app.homeserver \ --config-path "$DIR/etc/$port.config" \ -D \ From 66da8f60d057527fe74ff5790973d2865c980efb Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 15 Jun 2015 16:27:20 +0100 Subject: [PATCH 003/266] Bump the version of twisted needed for setup_requires to 15.2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f9929591e..c790484c7 100755 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ setup( description="Reference Synapse Home Server", install_requires=dependencies['requirements'](include_conditional=True).keys(), setup_requires=[ - "Twisted==14.0.2", # Here to override setuptools_trial's dependency on Twisted>=2.4.0 + "Twisted==15.2.1", # Here to override setuptools_trial's dependency on Twisted>=2.4.0 "setuptools_trial", "mock" ], From 6924852592e4e3737ccd050938abc266ae71bf18 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 23 Jun 2015 11:01:04 +0100 Subject: [PATCH 004/266] Batch SELECTs in _get_auth_chain_ids_txn --- synapse/storage/event_federation.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 1ba073884..ace745953 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -49,14 +49,22 @@ class EventFederationStore(SQLBaseStore): results = set() base_sql = ( - "SELECT auth_id FROM event_auth WHERE event_id = ?" + "SELECT auth_id FROM event_auth WHERE event_id IN (%s)" ) front = set(event_ids) while front: new_front = set() - for f in front: - txn.execute(base_sql, (f,)) + front_list = list(front) + chunks = [ + front_list[x:x+100] + for x in xrange(0, len(front), 100) + ] + for chunk in chunks: + txn.execute( + base_sql % (",".join(["?"] * len(chunk)),), + chunk + ) new_front.update([r[0] for r in txn.fetchall()]) new_front -= results From 6825eef9556b6f913252f0bb04676e39d9ddb808 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 23 Jun 2015 14:26:14 +0100 Subject: [PATCH 005/266] Oops: underride rule had an identifier with override in it. --- synapse/push/baserules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index f3d1cf5c5..1f015a7f2 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -164,7 +164,7 @@ def make_base_append_underride_rules(user): ] }, { - 'rule_id': 'global/override/.m.rule.contains_display_name', + 'rule_id': 'global/underride/.m.rule.contains_display_name', 'conditions': [ { 'kind': 'contains_display_name' From 5130d80d79fe1f95ce03b8f1cfd4fbf0a32f5ac8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 25 Jun 2015 17:18:19 +0100 Subject: [PATCH 006/266] Add bulk insert events API --- synapse/federation/federation_client.py | 6 + synapse/handlers/federation.py | 225 +++++++------ synapse/storage/event_federation.py | 108 +++--- synapse/storage/events.py | 417 ++++++++++++++---------- synapse/storage/roommember.py | 44 +-- synapse/storage/signatures.py | 28 +- synapse/storage/state.py | 36 +- synapse/storage/util/id_generators.py | 31 ++ 8 files changed, 521 insertions(+), 374 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index d3b46b24c..7ee3c66bf 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -327,6 +327,9 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def make_join(self, destinations, room_id, user_id): for destination in destinations: + if destination == self.server_name: + continue + try: ret = yield self.transport_layer.make_join( destination, room_id, user_id @@ -353,6 +356,9 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def send_join(self, destinations, pdu): for destination in destinations: + if destination == self.server_name: + continue + try: time_now = self._clock.time_msec() _, content = yield self.transport_layer.send_join( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b5d882fd6..079f46dff 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -138,26 +138,29 @@ class FederationHandler(BaseHandler): if state and auth_chain is not None: # If we have any state or auth_chain given to us by the replication # layer, then we should handle them (if we haven't before.) + + event_infos = [] + for e in itertools.chain(auth_chain, state): if e.event_id in seen_ids: continue - e.internal_metadata.outlier = True - try: - auth_ids = [e_id for e_id, _ in e.auth_events] - auth = { - (e.type, e.state_key): e for e in auth_chain - if e.event_id in auth_ids - } - yield self._handle_new_event( - origin, e, auth_events=auth - ) - seen_ids.add(e.event_id) - except: - logger.exception( - "Failed to handle state event %s", - e.event_id, - ) + auth_ids = [e_id for e_id, _ in e.auth_events] + auth = { + (e.type, e.state_key): e for e in auth_chain + if e.event_id in auth_ids + } + event_infos.append({ + "event": e, + "auth_events": auth, + }) + seen_ids.add(e.event_id) + + yield self._handle_new_events( + origin, + event_infos, + outliers=True + ) try: _, event_stream_id, max_stream_id = yield self._handle_new_event( @@ -292,38 +295,29 @@ class FederationHandler(BaseHandler): ).addErrback(unwrapFirstError) auth_events.update({a.event_id: a for a in results}) - yield defer.gatherResults( - [ - self._handle_new_event( - dest, a, - auth_events={ - (auth_events[a_id].type, auth_events[a_id].state_key): - auth_events[a_id] - for a_id, _ in a.auth_events - }, - ) - for a in auth_events.values() - if a.event_id not in seen_events - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + ev_infos = [] + for a in auth_events.values(): + if a.event_id in seen_events: + continue + ev_infos.append({ + "event": a, + "auth_events": { + (auth_events[a_id].type, auth_events[a_id].state_key): + auth_events[a_id] + for a_id, _ in a.auth_events + } + }) - yield defer.gatherResults( - [ - self._handle_new_event( - dest, event_map[e_id], - state=events_to_state[e_id], - backfilled=True, - auth_events={ - (auth_events[a_id].type, auth_events[a_id].state_key): - auth_events[a_id] - for a_id, _ in event_map[e_id].auth_events - }, - ) - for e_id in events_to_state - ], - consumeErrors=True - ).addErrback(unwrapFirstError) + for e_id in events_to_state: + ev_infos.append({ + "event": event_map[e_id], + "state": events_to_state[e_id], + "auth_events": { + (auth_events[a_id].type, auth_events[a_id].state_key): + auth_events[a_id] + for a_id, _ in event_map[e_id].auth_events + } + }) events.sort(key=lambda e: e.depth) @@ -331,10 +325,14 @@ class FederationHandler(BaseHandler): if event in events_to_state: continue - yield self._handle_new_event( - dest, event, - backfilled=True, - ) + ev_infos.append({ + "event": event, + }) + + yield self._handle_new_events( + dest, ev_infos, + backfilled=True, + ) defer.returnValue(events) @@ -600,32 +598,22 @@ class FederationHandler(BaseHandler): # FIXME pass - yield self._handle_auth_events( - origin, [e for e in auth_chain if e.event_id != event.event_id] - ) - - @defer.inlineCallbacks - def handle_state(e): + ev_infos = [] + for e in itertools.chain(state, auth_chain): if e.event_id == event.event_id: - return + continue e.internal_metadata.outlier = True - try: - auth_ids = [e_id for e_id, _ in e.auth_events] - auth = { + auth_ids = [e_id for e_id, _ in e.auth_events] + ev_infos.append({ + "event": e, + "auth_events": { (e.type, e.state_key): e for e in auth_chain if e.event_id in auth_ids } - yield self._handle_new_event( - origin, e, auth_events=auth - ) - except: - logger.exception( - "Failed to handle state event %s", - e.event_id, - ) + }) - yield defer.DeferredList([handle_state(e) for e in state]) + yield self._handle_new_events(origin, ev_infos, outliers=True) auth_ids = [e_id for e_id, _ in event.auth_events] auth_events = { @@ -940,11 +928,54 @@ class FederationHandler(BaseHandler): def _handle_new_event(self, origin, event, state=None, backfilled=False, current_state=None, auth_events=None): - logger.debug( - "_handle_new_event: %s, sigs: %s", - event.event_id, event.signatures, + outlier = event.internal_metadata.is_outlier() + + context = yield self._prep_event( + origin, event, + state=state, + backfilled=backfilled, + current_state=current_state, + auth_events=auth_events, ) + event_stream_id, max_stream_id = yield self.store.persist_event( + event, + context=context, + backfilled=backfilled, + is_new_state=(not outlier and not backfilled), + current_state=current_state, + ) + + defer.returnValue((context, event_stream_id, max_stream_id)) + + @defer.inlineCallbacks + def _handle_new_events(self, origin, event_infos, backfilled=False, + outliers=False): + contexts = yield defer.gatherResults( + [ + self._prep_event( + origin, + ev_info["event"], + state=ev_info.get("state"), + backfilled=backfilled, + auth_events=ev_info.get("auth_events"), + ) + for ev_info in event_infos + ] + ) + + yield self.store.persist_events( + [ + (ev_info["event"], context) + for ev_info, context in itertools.izip(event_infos, contexts) + ], + backfilled=backfilled, + is_new_state=(not outliers and not backfilled), + ) + + @defer.inlineCallbacks + def _prep_event(self, origin, event, state=None, backfilled=False, + current_state=None, auth_events=None): outlier = event.internal_metadata.is_outlier() context = yield self.state_handler.compute_event_context( @@ -954,13 +985,6 @@ class FederationHandler(BaseHandler): if not auth_events: auth_events = context.current_state - logger.debug( - "_handle_new_event: %s, auth_events: %s", - event.event_id, auth_events, - ) - - is_new_state = not outlier - # This is a hack to fix some old rooms where the initial join event # didn't reference the create event in its auth events. if event.type == EventTypes.Member and not event.auth_events: @@ -984,26 +1008,7 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR - # FIXME: Don't store as rejected with AUTH_ERROR if we haven't - # seen all the auth events. - yield self.store.persist_event( - event, - context=context, - backfilled=backfilled, - is_new_state=False, - current_state=current_state, - ) - raise - - event_stream_id, max_stream_id = yield self.store.persist_event( - event, - context=context, - backfilled=backfilled, - is_new_state=(is_new_state and not backfilled), - current_state=current_state, - ) - - defer.returnValue((context, event_stream_id, max_stream_id)) + defer.returnValue(context) @defer.inlineCallbacks def on_query_auth(self, origin, event_id, remote_auth_chain, rejects, @@ -1066,14 +1071,24 @@ class FederationHandler(BaseHandler): @log_function def do_auth(self, origin, event, context, auth_events): # Check if we have all the auth events. - have_events = yield self.store.have_events( - [e_id for e_id, _ in event.auth_events] - ) - + current_state = set(e.event_id for e in auth_events.values()) event_auth_events = set(e_id for e_id, _ in event.auth_events) + + if event_auth_events - current_state: + have_events = yield self.store.have_events( + event_auth_events - current_state + ) + else: + have_events = {} + + have_events.update({ + e.event_id: "" + for e in auth_events.values() + }) + seen_events = set(have_events.keys()) - missing_auth = event_auth_events - seen_events + missing_auth = event_auth_events - seen_events - current_state if missing_auth: logger.info("Missing auth: %s", missing_auth) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index ace745953..09705c60b 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -282,8 +282,7 @@ class EventFederationStore(SQLBaseStore): }, ) - def _handle_prev_events(self, txn, outlier, event_id, prev_events, - room_id): + def _handle_mult_prev_events(self, txn, events): """ For the given event, update the event edges table and forward and backward extremities tables. @@ -293,68 +292,75 @@ class EventFederationStore(SQLBaseStore): table="event_edges", values=[ { - "event_id": event_id, + "event_id": ev.event_id, "prev_event_id": e_id, - "room_id": room_id, + "room_id": ev.room_id, "is_state": False, } - for e_id, _ in prev_events + for ev in events + for e_id, _ in ev.prev_events ], ) - # Update the extremities table if this is not an outlier. - if not outlier: - for e_id, _ in prev_events: - # TODO (erikj): This could be done as a bulk insert - self._simple_delete_txn( - txn, - table="event_forward_extremities", - keyvalues={ - "event_id": e_id, - "room_id": room_id, - } + events_by_room = {} + for ev in events: + events_by_room.setdefault(ev.room_id, []).append(ev) + + for room_id, room_events in events_by_room.items(): + prevs = [ + e_id for ev in room_events for e_id, _ in ev.prev_events + if not ev.internal_metadata.is_outlier() + ] + if prevs: + txn.execute( + "DELETE FROM event_forward_extremities" + " WHERE room_id = ?" + " AND event_id in (%s)" % ( + ",".join(["?"] * len(prevs)), + ), + [room_id] + prevs, ) - # We only insert as a forward extremity the new event if there are - # no other events that reference it as a prev event - query = ( - "SELECT 1 FROM event_edges WHERE prev_event_id = ?" - ) + query = ( + "INSERT INTO event_forward_extremities (event_id, room_id)" + " SELECT ?, ? WHERE NOT EXISTS (" + " SELECT 1 FROM event_edges WHERE prev_event_id = ?" + " )" + ) - txn.execute(query, (event_id,)) + txn.executemany( + query, + [(ev.event_id, ev.room_id, ev.event_id) for ev in events] + ) - if not txn.fetchone(): - query = ( - "INSERT INTO event_forward_extremities" - " (event_id, room_id)" - " VALUES (?, ?)" - ) + query = ( + "INSERT INTO event_backward_extremities (event_id, room_id)" + " SELECT ?, ? WHERE NOT EXISTS (" + " SELECT 1 FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + " )" + " AND NOT EXISTS (" + " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " + " AND outlier = ?" + " )" + ) - txn.execute(query, (event_id, room_id)) + txn.executemany(query, [ + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + for ev in events for e_id, _ in ev.prev_events + if not ev.internal_metadata.is_outlier() + ]) - query = ( - "INSERT INTO event_backward_extremities (event_id, room_id)" - " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - " )" - " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" - " )" - ) - - txn.executemany(query, [ - (e_id, room_id, e_id, room_id, e_id, room_id, False) - for e_id, _ in prev_events - ]) - - query = ( - "DELETE FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - ) - txn.execute(query, (event_id, room_id)) + query = ( + "DELETE FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + ) + txn.executemany( + query, + [(ev.event_id, ev.room_id) for ev in events] + ) + for room_id in events_by_room: txn.call_after( self.get_latest_event_ids_in_room.invalidate, room_id ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2caf0aae8..d760acb87 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -23,9 +23,7 @@ from synapse.events.utils import prune_event from synapse.util.logcontext import preserve_context_over_deferred from synapse.util.logutils import log_function from synapse.api.constants import EventTypes -from synapse.crypto.event_signing import compute_event_reference_hash -from syutil.base64util import decode_base64 from syutil.jsonutil import encode_json from contextlib import contextmanager @@ -46,6 +44,48 @@ EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events class EventsStore(SQLBaseStore): + @defer.inlineCallbacks + def persist_events(self, events_and_contexts, backfilled=False, + is_new_state=True): + if not events_and_contexts: + return + + if backfilled: + if not self.min_token_deferred.called: + yield self.min_token_deferred + start = self.min_token - 1 + self.min_token -= len(events_and_contexts) + 1 + stream_orderings = range(start, self.min_token, -1) + + @contextmanager + def stream_ordering_manager(): + yield stream_orderings + stream_ordering_manager = stream_ordering_manager() + else: + stream_ordering_manager = yield self._stream_id_gen.get_next_mult( + self, len(events_and_contexts) + ) + + with stream_ordering_manager as stream_orderings: + for (event, _), stream in zip(events_and_contexts, stream_orderings): + event.internal_metadata.stream_ordering = stream + + chunks = [ + events_and_contexts[x:x+100] + for x in xrange(0, len(events_and_contexts), 100) + ] + + for chunk in chunks: + # We can't easily parallelize these since different chunks + # might contain the same event. :( + yield self.runInteraction( + "persist_events", + self._persist_events_txn, + events_and_contexts=chunk, + backfilled=backfilled, + is_new_state=is_new_state, + ) + @defer.inlineCallbacks @log_function def persist_event(self, event, context, backfilled=False, @@ -67,13 +107,13 @@ class EventsStore(SQLBaseStore): try: with stream_ordering_manager as stream_ordering: + event.internal_metadata.stream_ordering = stream_ordering yield self.runInteraction( "persist_event", self._persist_event_txn, event=event, context=context, backfilled=backfilled, - stream_ordering=stream_ordering, is_new_state=is_new_state, current_state=current_state, ) @@ -116,12 +156,7 @@ class EventsStore(SQLBaseStore): @log_function def _persist_event_txn(self, txn, event, context, backfilled, - stream_ordering=None, is_new_state=True, - current_state=None): - - # Remove the any existing cache entries for the event_id - txn.call_after(self._invalidate_get_event_cache, event.event_id) - + is_new_state=True, current_state=None): # We purposefully do this first since if we include a `current_state` # key, we *want* to update the `current_state_events` table if current_state: @@ -149,37 +184,78 @@ class EventsStore(SQLBaseStore): } ) - outlier = event.internal_metadata.is_outlier() - - if not outlier: - self._update_min_depth_for_room_txn( - txn, - event.room_id, - event.depth - ) - - have_persisted = self._simple_select_one_txn( + return self._persist_events_txn( txn, - table="events", - keyvalues={"event_id": event.event_id}, - retcols=["event_id", "outlier"], - allow_none=True, + [(event, context)], + backfilled=backfilled, + is_new_state=is_new_state, ) - metadata_json = encode_json( - event.internal_metadata.get_dict(), - using_frozen_dicts=USE_FROZEN_DICTS - ).decode("UTF-8") + @log_function + def _persist_events_txn(self, txn, events_and_contexts, backfilled, + is_new_state=True): - # If we have already persisted this event, we don't need to do any - # more processing. - # The processing above must be done on every call to persist event, - # since they might not have happened on previous calls. For example, - # if we are persisting an event that we had persisted as an outlier, - # but is no longer one. - if have_persisted: - if not outlier and have_persisted["outlier"]: - self._store_state_groups_txn(txn, event, context) + # Remove the any existing cache entries for the event_ids + for event, _ in events_and_contexts: + txn.call_after(self._invalidate_get_event_cache, event.event_id) + + depth_updates = {} + for event, _ in events_and_contexts: + if event.internal_metadata.is_outlier(): + continue + depth_updates[event.room_id] = max( + event.depth, depth_updates.get(event.room_id, event.depth) + ) + + for room_id, depth in depth_updates.items(): + self._update_min_depth_for_room_txn(txn, room_id, depth) + + txn.execute( + "SELECT event_id, outlier FROM events WHERE event_id in (%s)" % ( + ",".join(["?"] * len(events_and_contexts)), + ), + [event.event_id for event, _ in events_and_contexts] + ) + have_persisted = { + event_id: outlier + for event_id, outlier in txn.fetchall() + } + + event_map = {} + to_remove = set() + for event, context in events_and_contexts: + # Handle the case of the list including the same event multiple + # times. The tricky thing here is when they differ by whether + # they are an outlier. + if event.event_id in event_map: + other = event_map[event.event_id] + + if not other.internal_metadata.is_outlier(): + to_remove.add(event) + continue + elif not event.internal_metadata.is_outlier(): + to_remove.add(event) + continue + else: + to_remove.add(other) + + event_map[event.event_id] = event + + if event.event_id not in have_persisted: + continue + + to_remove.add(event) + + outlier_persisted = have_persisted[event.event_id] + if not event.internal_metadata.is_outlier() and outlier_persisted: + self._store_state_groups_txn( + txn, event, context, + ) + + metadata_json = encode_json( + event.internal_metadata.get_dict(), + using_frozen_dicts=USE_FROZEN_DICTS + ).decode("UTF-8") sql = ( "UPDATE event_json SET internal_metadata = ?" @@ -198,94 +274,91 @@ class EventsStore(SQLBaseStore): sql, (False, event.event_id,) ) + + events_and_contexts = filter( + lambda ec: ec[0] not in to_remove, + events_and_contexts + ) + + if not events_and_contexts: return - if not outlier: - self._store_state_groups_txn(txn, event, context) + self._store_mult_state_groups_txn(txn, [ + (event, context) + for event, context in events_and_contexts + if not event.internal_metadata.is_outlier() + ]) - self._handle_prev_events( + self._handle_mult_prev_events( txn, - outlier=outlier, - event_id=event.event_id, - prev_events=event.prev_events, - room_id=event.room_id, + events=[event for event, _ in events_and_contexts], ) - if event.type == EventTypes.Member: - self._store_room_member_txn(txn, event) - elif event.type == EventTypes.Name: - self._store_room_name_txn(txn, event) - elif event.type == EventTypes.Topic: - self._store_room_topic_txn(txn, event) - elif event.type == EventTypes.Redaction: - self._store_redaction(txn, event) + for event, _ in events_and_contexts: + if event.type == EventTypes.Name: + self._store_room_name_txn(txn, event) + elif event.type == EventTypes.Topic: + self._store_room_topic_txn(txn, event) + elif event.type == EventTypes.Redaction: + self._store_redaction(txn, event) - event_dict = { - k: v - for k, v in event.get_dict().items() - if k not in [ - "redacted", - "redacted_because", + self._store_room_members_txn( + txn, + [ + event + for event, _ in events_and_contexts + if event.type == EventTypes.Member ] - } + ) - self._simple_insert_txn( + def event_dict(event): + return { + k: v + for k, v in event.get_dict().items() + if k not in [ + "redacted", + "redacted_because", + ] + } + + self._simple_insert_many_txn( txn, table="event_json", - values={ - "event_id": event.event_id, - "room_id": event.room_id, - "internal_metadata": metadata_json, - "json": encode_json( - event_dict, using_frozen_dicts=USE_FROZEN_DICTS - ).decode("UTF-8"), - }, + values=[ + { + "event_id": event.event_id, + "room_id": event.room_id, + "internal_metadata": encode_json( + event.internal_metadata.get_dict(), + using_frozen_dicts=USE_FROZEN_DICTS + ).decode("UTF-8"), + "json": encode_json( + event_dict(event), using_frozen_dicts=USE_FROZEN_DICTS + ).decode("UTF-8"), + } + for event, _ in events_and_contexts + ], ) - content = encode_json( - event.content, using_frozen_dicts=USE_FROZEN_DICTS - ).decode("UTF-8") - - vals = { - "topological_ordering": event.depth, - "event_id": event.event_id, - "type": event.type, - "room_id": event.room_id, - "content": content, - "processed": True, - "outlier": outlier, - "depth": event.depth, - } - - unrec = { - k: v - for k, v in event.get_dict().items() - if k not in vals.keys() and k not in [ - "redacted", - "redacted_because", - "signatures", - "hashes", - "prev_events", - ] - } - - vals["unrecognized_keys"] = encode_json( - unrec, using_frozen_dicts=USE_FROZEN_DICTS - ).decode("UTF-8") - - sql = ( - "INSERT INTO events" - " (stream_ordering, topological_ordering, event_id, type," - " room_id, content, processed, outlier, depth)" - " VALUES (?,?,?,?,?,?,?,?,?)" - ) - - txn.execute( - sql, - ( - stream_ordering, event.depth, event.event_id, event.type, - event.room_id, content, True, outlier, event.depth - ) + self._simple_insert_many_txn( + txn, + table="events", + values=[ + { + "stream_ordering": event.internal_metadata.stream_ordering, + "topological_ordering": event.depth, + "depth": event.depth, + "event_id": event.event_id, + "room_id": event.room_id, + "type": event.type, + "processed": True, + "outlier": event.internal_metadata.is_outlier(), + "content": encode_json( + event.content, using_frozen_dicts=USE_FROZEN_DICTS + ).decode("UTF-8"), + } + for event, _ in events_and_contexts + ], ) if context.rejected: @@ -293,19 +366,19 @@ class EventsStore(SQLBaseStore): txn, event.event_id, context.rejected ) - for hash_alg, hash_base64 in event.hashes.items(): - hash_bytes = decode_base64(hash_base64) - self._store_event_content_hash_txn( - txn, event.event_id, hash_alg, hash_bytes, - ) + # for hash_alg, hash_base64 in event.hashes.items(): + # hash_bytes = decode_base64(hash_base64) + # self._store_event_content_hash_txn( + # txn, event.event_id, hash_alg, hash_bytes, + # ) - for prev_event_id, prev_hashes in event.prev_events: - for alg, hash_base64 in prev_hashes.items(): - hash_bytes = decode_base64(hash_base64) - self._store_prev_event_hash_txn( - txn, event.event_id, prev_event_id, alg, - hash_bytes - ) + # for prev_event_id, prev_hashes in event.prev_events: + # for alg, hash_base64 in prev_hashes.items(): + # hash_bytes = decode_base64(hash_base64) + # self._store_prev_event_hash_txn( + # txn, event.event_id, prev_event_id, alg, + # hash_bytes + # ) self._simple_insert_many_txn( txn, @@ -316,16 +389,22 @@ class EventsStore(SQLBaseStore): "room_id": event.room_id, "auth_id": auth_id, } + for event, _ in events_and_contexts for auth_id, _ in event.auth_events ], ) - (ref_alg, ref_hash_bytes) = compute_event_reference_hash(event) - self._store_event_reference_hash_txn( - txn, event.event_id, ref_alg, ref_hash_bytes + self._store_event_reference_hashes_txn( + txn, [event for event, _ in events_and_contexts] ) - if event.is_state(): + state_events_and_contexts = filter( + lambda i: i[0].is_state(), + events_and_contexts, + ) + + state_values = [] + for event, context in state_events_and_contexts: vals = { "event_id": event.event_id, "room_id": event.room_id, @@ -337,51 +416,55 @@ class EventsStore(SQLBaseStore): if hasattr(event, "replaces_state"): vals["prev_state"] = event.replaces_state - self._simple_insert_txn( - txn, - "state_events", - vals, - ) + state_values.append(vals) - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": event.event_id, - "prev_event_id": e_id, - "room_id": event.room_id, - "is_state": True, - } - for e_id, h in event.prev_state - ], - ) + self._simple_insert_many_txn( + txn, + table="state_events", + values=state_values, + ) - if is_new_state and not context.rejected: - txn.call_after( - self.get_current_state_for_key.invalidate, - event.room_id, event.type, event.state_key - ) + self._simple_insert_many_txn( + txn, + table="event_edges", + values=[ + { + "event_id": event.event_id, + "prev_event_id": prev_id, + "room_id": event.room_id, + "is_state": True, + } + for event, _ in state_events_and_contexts + for prev_id, _ in event.prev_state + ], + ) - if (event.type == EventTypes.Name - or event.type == EventTypes.Aliases): + if is_new_state: + for event, _ in state_events_and_contexts: + if not context.rejected: txn.call_after( - self.get_room_name_and_aliases.invalidate, - event.room_id - ) + self.get_current_state_for_key.invalidate, + event.room_id, event.type, event.state_key + ) - self._simple_upsert_txn( - txn, - "current_state_events", - keyvalues={ - "room_id": event.room_id, - "type": event.type, - "state_key": event.state_key, - }, - values={ - "event_id": event.event_id, - } - ) + if event.type in [EventTypes.Name, EventTypes.Aliases]: + txn.call_after( + self.get_room_name_and_aliases.invalidate, + event.room_id + ) + + self._simple_upsert_txn( + txn, + "current_state_events", + keyvalues={ + "room_id": event.room_id, + "type": event.type, + "state_key": event.state_key, + }, + values={ + "event_id": event.event_id, + } + ) return diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index d36a6c18a..4db07f6fb 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -35,38 +35,28 @@ RoomsForUser = namedtuple( class RoomMemberStore(SQLBaseStore): - def _store_room_member_txn(self, txn, event): + def _store_room_members_txn(self, txn, events): """Store a room member in the database. """ - try: - target_user_id = event.state_key - except: - logger.exception( - "Failed to parse target_user_id=%s", target_user_id - ) - raise - - logger.debug( - "_store_room_member_txn: target_user_id=%s, membership=%s", - target_user_id, - event.membership, - ) - - self._simple_insert_txn( + self._simple_insert_many_txn( txn, - "room_memberships", - { - "event_id": event.event_id, - "user_id": target_user_id, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - } + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + } + for event in events + ] ) - txn.call_after(self.get_rooms_for_user.invalidate, target_user_id) - txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) - txn.call_after(self.get_users_in_room.invalidate, event.room_id) + for event in events: + txn.call_after(self.get_rooms_for_user.invalidate, event.state_key) + txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) + txn.call_after(self.get_users_in_room.invalidate, event.room_id) def get_room_member(self, user_id, room_id): """Retrieve the current state of a room member. diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index f05182863..4f15e534b 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -18,6 +18,7 @@ from twisted.internet import defer from _base import SQLBaseStore from syutil.base64util import encode_base64 +from synapse.crypto.event_signing import compute_event_reference_hash class SignatureStore(SQLBaseStore): @@ -101,23 +102,26 @@ class SignatureStore(SQLBaseStore): txn.execute(query, (event_id, )) return {k: v for k, v in txn.fetchall()} - def _store_event_reference_hash_txn(self, txn, event_id, algorithm, - hash_bytes): + def _store_event_reference_hashes_txn(self, txn, events): """Store a hash for a PDU Args: txn (cursor): - event_id (str): Id for the Event. - algorithm (str): Hashing algorithm. - hash_bytes (bytes): Hash function output bytes. + events (list): list of Events. """ - self._simple_insert_txn( + + vals = [] + for event in events: + ref_alg, ref_hash_bytes = compute_event_reference_hash(event) + vals.append({ + "event_id": event.event_id, + "algorithm": ref_alg, + "hash": buffer(ref_hash_bytes), + }) + + self._simple_insert_many_txn( txn, - "event_reference_hashes", - { - "event_id": event_id, - "algorithm": algorithm, - "hash": buffer(hash_bytes), - }, + table="event_reference_hashes", + values=vals, ) def _get_event_signatures_txn(self, txn, event_id): diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f2b17f29e..61214e2b2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -100,16 +100,23 @@ class StateStore(SQLBaseStore): ) def _store_state_groups_txn(self, txn, event, context): - if context.current_state is None: - return + return self._store_mult_state_groups_txn(txn, [(event, context)]) - state_events = dict(context.current_state) + def _store_mult_state_groups_txn(self, txn, events_and_contexts): + state_groups = {} + for event, context in events_and_contexts: + if context.current_state is None: + continue - if event.is_state(): - state_events[(event.type, event.state_key)] = event + if context.state_group is not None: + state_groups[event.event_id] = context.state_group + continue + + state_events = dict(context.current_state) + + if event.is_state(): + state_events[(event.type, event.state_key)] = event - state_group = context.state_group - if not state_group: state_group = self._state_groups_id_gen.get_next_txn(txn) self._simple_insert_txn( txn, @@ -135,14 +142,19 @@ class StateStore(SQLBaseStore): for state in state_events.values() ], ) + state_groups[event.event_id] = state_group - self._simple_insert_txn( + self._simple_insert_many_txn( txn, table="event_to_state_groups", - values={ - "state_group": state_group, - "event_id": event.event_id, - }, + values=[ + { + "state_group": state_groups[event.event_id], + "event_id": event.event_id, + } + for event, context in events_and_contexts + if context.current_state is not None + ], ) @defer.inlineCallbacks diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 89d1643f1..83eab6309 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -107,6 +107,37 @@ class StreamIdGenerator(object): defer.returnValue(manager()) + @defer.inlineCallbacks + def get_next_mult(self, store, n): + """ + Usage: + with yield stream_id_gen.get_next(store, n) as stream_ids: + # ... persist events ... + """ + if not self._current_max: + yield store.runInteraction( + "_compute_current_max", + self._get_or_compute_current_max, + ) + + with self._lock: + next_ids = range(self._current_max + 1, self._current_max + n + 1) + self._current_max += n + + for next_id in next_ids: + self._unfinished_ids.append(next_id) + + @contextlib.contextmanager + def manager(): + try: + yield next_ids + finally: + with self._lock: + for next_id in next_ids: + self._unfinished_ids.remove(next_id) + + defer.returnValue(manager()) + @defer.inlineCallbacks def get_max_token(self, store): """Returns the maximum stream id such that all stream ids less than or From b5f55a1d85386834b18828b196e1859a4410d98a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 26 Jun 2015 09:52:24 +0100 Subject: [PATCH 007/266] Implement bulk verify_signed_json API --- synapse/crypto/keyring.py | 426 ++++++++++++++++-------- synapse/federation/federation_base.py | 121 ++++--- synapse/federation/federation_client.py | 57 +++- synapse/storage/keys.py | 50 +-- 4 files changed, 439 insertions(+), 215 deletions(-) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index aff69c5f8..873c9b40f 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -25,11 +25,11 @@ from syutil.base64util import decode_base64, encode_base64 from synapse.api.errors import SynapseError, Codes from synapse.util.retryutils import get_retry_limiter - -from synapse.util.async import ObservableDeferred +from synapse.util import unwrapFirstError from OpenSSL import crypto +from collections import namedtuple import urllib import hashlib import logging @@ -38,6 +38,9 @@ import logging logger = logging.getLogger(__name__) +KeyGroup = namedtuple("KeyGroup", ("server_name", "group_id", "key_ids")) + + class Keyring(object): def __init__(self, hs): self.store = hs.get_datastore() @@ -49,141 +52,274 @@ class Keyring(object): self.key_downloads = {} - @defer.inlineCallbacks def verify_json_for_server(self, server_name, json_object): - logger.debug("Verifying for %s", server_name) - key_ids = signature_ids(json_object, server_name) - if not key_ids: - raise SynapseError( - 400, - "Not signed with a supported algorithm", - Codes.UNAUTHORIZED, - ) - try: - verify_key = yield self.get_server_verify_key(server_name, key_ids) - except IOError as e: - logger.warn( - "Got IOError when downloading keys for %s: %s %s", - server_name, type(e).__name__, str(e.message), - ) - raise SynapseError( - 502, - "Error downloading keys for %s" % (server_name,), - Codes.UNAUTHORIZED, - ) - except Exception as e: - logger.warn( - "Got Exception when downloading keys for %s: %s %s", - server_name, type(e).__name__, str(e.message), - ) - raise SynapseError( - 401, - "No key for %s with id %s" % (server_name, key_ids), - Codes.UNAUTHORIZED, - ) + return self.verify_json_objects_for_server( + [(server_name, json_object)] + )[0] - try: - verify_signed_json(json_object, server_name, verify_key) - except: - raise SynapseError( - 401, - "Invalid signature for server %s with key %s:%s" % ( - server_name, verify_key.alg, verify_key.version - ), - Codes.UNAUTHORIZED, - ) + def verify_json_objects_for_server(self, server_and_json): + """Bulk verfies signatures of json objects, bulk fetching keys as + necessary. - @defer.inlineCallbacks - def get_server_verify_key(self, server_name, key_ids): - """Finds a verification key for the server with one of the key ids. - Trys to fetch the key from a trusted perspective server first. Args: - server_name(str): The name of the server to fetch a key for. - keys_ids (list of str): The key_ids to check for. + server_and_json (list): List of pairs of (server_name, json_object) + + Returns: + list of deferreds indicating success or failure to verify each + json object's signature for the given server_name. """ - cached = yield self.store.get_server_verify_keys(server_name, key_ids) + group_id_to_json = {} + group_id_to_group = {} + group_ids = [] - if cached: - defer.returnValue(cached[0]) - return + next_group_id = 0 + deferreds = {} - download = self.key_downloads.get(server_name) + for server_name, json_object in server_and_json: + logger.debug("Verifying for %s", server_name) + group_id = next_group_id + next_group_id += 1 + group_ids.append(group_id) - if download is None: - download = self._get_server_verify_key_impl(server_name, key_ids) - download = ObservableDeferred( - download, - consumeErrors=True + key_ids = signature_ids(json_object, server_name) + if not key_ids: + deferreds[group_id] = defer.fail(SynapseError( + 400, + "Not signed with a supported algorithm", + Codes.UNAUTHORIZED, + )) + + group = KeyGroup(server_name, group_id, key_ids) + + group_id_to_group[group_id] = group + group_id_to_json[group_id] = json_object + + @defer.inlineCallbacks + def handle_key_deferred(group, deferred): + server_name = group.server_name + try: + _, _, key_id, verify_key = yield deferred + except IOError as e: + logger.warn( + "Got IOError when downloading keys for %s: %s %s", + server_name, type(e).__name__, str(e.message), + ) + raise SynapseError( + 502, + "Error downloading keys for %s" % (server_name,), + Codes.UNAUTHORIZED, + ) + except Exception as e: + logger.exception( + "Got Exception when downloading keys for %s: %s %s", + server_name, type(e).__name__, str(e.message), + ) + raise SynapseError( + 401, + "No key for %s with id %s" % (server_name, key_ids), + Codes.UNAUTHORIZED, + ) + + json_object = group_id_to_json[group.group_id] + + try: + verify_signed_json(json_object, server_name, verify_key) + except: + raise SynapseError( + 401, + "Invalid signature for server %s with key %s:%s" % ( + server_name, verify_key.alg, verify_key.version + ), + Codes.UNAUTHORIZED, + ) + + deferreds.update(self.get_server_verify_keys( + group_id_to_group + )) + + return [ + handle_key_deferred( + group_id_to_group[g_id], + deferreds[g_id], ) - self.key_downloads[server_name] = download + for g_id in group_ids + ] - @download.addBoth - def callback(ret): - del self.key_downloads[server_name] - return ret + def get_server_verify_keys(self, group_id_to_group): + """Takes a dict of KeyGroups and tries to find at least one key for + each group. + """ - r = yield download.observe() - defer.returnValue(r) + # These are functions that produce keys given a list of key ids + key_fetch_fns = ( + self.get_keys_from_store, # First try the local store + self.get_keys_from_perspectives, # Then try via perspectives + self.get_keys_from_server, # Then try directly + ) + + group_deferreds = { + group_id: defer.Deferred() + for group_id in group_id_to_group + } + + @defer.inlineCallbacks + def do_iterations(): + merged_results = {} + + missing_keys = { + group.server_name: key_id + for group in group_id_to_group.values() + for key_id in group.key_ids + } + + for fn in key_fetch_fns: + results = yield fn(missing_keys.items()) + merged_results.update(results) + + # We now need to figure out which groups we have keys for + # and which we don't + missing_groups = {} + for group in group_id_to_group.values(): + for key_id in group.key_ids: + if key_id in merged_results[group.server_name]: + group_deferreds.pop(group.group_id).callback(( + group.group_id, + group.server_name, + key_id, + merged_results[group.server_name][key_id], + )) + break + else: + missing_groups.setdefault( + group.server_name, [] + ).append(group) + + if not missing_groups: + break + + missing_keys = { + server_name: set( + key_id for group in groups for key_id in group.key_ids + ) + for server_name, groups in missing_groups.items() + } + + for group in missing_groups.values(): + group_deferreds.pop(group.group_id).errback(SynapseError( + 401, + "No key for %s with id %s" % ( + group.server_name, group.key_ids, + ), + Codes.UNAUTHORIZED, + )) + + def on_err(err): + for deferred in group_deferreds.values(): + deferred.errback(err) + group_deferreds.clear() + + do_iterations().addErrback(on_err) + + return group_deferreds @defer.inlineCallbacks - def _get_server_verify_key_impl(self, server_name, key_ids): - keys = None + def get_keys_from_store(self, server_name_and_key_ids): + res = yield defer.gatherResults( + [ + self.store.get_server_verify_keys(server_name, key_ids) + for server_name, key_ids in server_name_and_key_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + defer.returnValue(dict(zip( + [server_name for server_name, _ in server_name_and_key_ids], + res + ))) + + @defer.inlineCallbacks + def get_keys_from_perspectives(self, server_name_and_key_ids): @defer.inlineCallbacks def get_key(perspective_name, perspective_keys): try: result = yield self.get_server_verify_key_v2_indirect( - server_name, key_ids, perspective_name, perspective_keys + server_name_and_key_ids, perspective_name, perspective_keys ) defer.returnValue(result) except Exception as e: - logging.info( - "Unable to getting key %r for %r from %r: %s %s", - key_ids, server_name, perspective_name, + logger.exception( + "Unable to get key from %r: %s %s", + perspective_name, type(e).__name__, str(e.message), ) + defer.returnValue({}) - perspective_results = yield defer.gatherResults([ - get_key(p_name, p_keys) - for p_name, p_keys in self.perspective_servers.items() - ]) + results = yield defer.gatherResults( + [ + get_key(p_name, p_keys) + for p_name, p_keys in self.perspective_servers.items() + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) - for results in perspective_results: - if results is not None: - keys = results + union_of_keys = {} + for result in results: + for server_name, keys in result.items(): + union_of_keys.setdefault(server_name, {}).update(keys) - limiter = yield get_retry_limiter( - server_name, - self.clock, - self.store, - ) + defer.returnValue(union_of_keys) - with limiter: - if not keys: + @defer.inlineCallbacks + def get_keys_from_server(self, server_name_and_key_ids): + @defer.inlineCallbacks + def get_key(server_name, key_ids): + limiter = yield get_retry_limiter( + server_name, + self.clock, + self.store, + ) + with limiter: + keys = None try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids ) except Exception as e: - logging.info( + logger.info( "Unable to getting key %r for %r directly: %s %s", key_ids, server_name, type(e).__name__, str(e.message), ) - if not keys: - keys = yield self.get_server_verify_key_v1_direct( - server_name, key_ids - ) + if not keys: + keys = yield self.get_server_verify_key_v1_direct( + server_name, key_ids + ) - for key_id in key_ids: - if key_id in keys: - defer.returnValue(keys[key_id]) - return - raise ValueError("No verification key found for given key ids") + keys = {server_name: keys} + + defer.returnValue(keys) + + results = yield defer.gatherResults( + [ + get_key(server_name, key_ids) + for server_name, key_ids in server_name_and_key_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + merged = {} + for result in results: + merged.update(result) + + defer.returnValue({ + server_name: keys + for server_name, keys in merged.items() + if keys + }) @defer.inlineCallbacks - def get_server_verify_key_v2_indirect(self, server_name, key_ids, + def get_server_verify_key_v2_indirect(self, server_names_and_key_ids, perspective_name, perspective_keys): limiter = yield get_retry_limiter( @@ -204,6 +340,7 @@ class Keyring(object): u"minimum_valid_until_ts": 0 } for key_id in key_ids } + for server_name, key_ids in server_names_and_key_ids } }, ) @@ -243,23 +380,29 @@ class Keyring(object): " server %r" % (perspective_name,) ) - response_keys = yield self.process_v2_response( - server_name, perspective_name, response + processed_response = yield self.process_v2_response( + perspective_name, response ) - keys.update(response_keys) + for server_name, response_keys in processed_response.items(): + keys.setdefault(server_name, {}).update(response_keys) - yield self.store_keys( - server_name=server_name, - from_server=perspective_name, - verify_keys=keys, - ) + yield defer.gatherResults( + [ + self.store_keys( + server_name=server_name, + from_server=perspective_name, + verify_keys=response_keys, + ) + for server_name, response_keys in keys.items() + ], + consumeErrors=True + ).addErrback(unwrapFirstError) defer.returnValue(keys) @defer.inlineCallbacks def get_server_verify_key_v2_direct(self, server_name, key_ids): - keys = {} for requested_key_id in key_ids: @@ -295,25 +438,30 @@ class Keyring(object): raise ValueError("TLS certificate not allowed by fingerprints") response_keys = yield self.process_v2_response( - server_name=server_name, from_server=server_name, - requested_id=requested_key_id, + requested_ids=[requested_key_id], response_json=response, ) keys.update(response_keys) - yield self.store_keys( - server_name=server_name, - from_server=server_name, - verify_keys=keys, - ) + yield defer.gatherResults( + [ + self.store_keys( + server_name=key_server_name, + from_server=server_name, + verify_keys=verify_keys, + ) + for key_server_name, verify_keys in keys.items() + ], + consumeErrors=True + ).addErrback(unwrapFirstError) defer.returnValue(keys) @defer.inlineCallbacks - def process_v2_response(self, server_name, from_server, response_json, - requested_id=None): + def process_v2_response(self, from_server, response_json, + requested_ids=[]): time_now_ms = self.clock.time_msec() response_keys = {} verify_keys = {} @@ -335,6 +483,8 @@ class Keyring(object): verify_key.time_added = time_now_ms old_verify_keys[key_id] = verify_key + results = {} + server_name = response_json["server_name"] for key_id in response_json["signatures"].get(server_name, {}): if key_id not in response_json["verify_keys"]: raise ValueError( @@ -357,28 +507,31 @@ class Keyring(object): signed_key_json_bytes = encode_canonical_json(signed_key_json) ts_valid_until_ms = signed_key_json[u"valid_until_ts"] - updated_key_ids = set() - if requested_id is not None: - updated_key_ids.add(requested_id) + updated_key_ids = set(requested_ids) updated_key_ids.update(verify_keys) updated_key_ids.update(old_verify_keys) response_keys.update(verify_keys) response_keys.update(old_verify_keys) - for key_id in updated_key_ids: - yield self.store.store_server_keys_json( - server_name=server_name, - key_id=key_id, - from_server=server_name, - ts_now_ms=time_now_ms, - ts_expires_ms=ts_valid_until_ms, - key_json_bytes=signed_key_json_bytes, - ) + yield defer.gatherResults( + [ + self.store.store_server_keys_json( + server_name=server_name, + key_id=key_id, + from_server=server_name, + ts_now_ms=time_now_ms, + ts_expires_ms=ts_valid_until_ms, + key_json_bytes=signed_key_json_bytes, + ) + for key_id in updated_key_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) - defer.returnValue(response_keys) + results[server_name] = response_keys - raise ValueError("No verification key found for given key ids") + defer.returnValue(results) @defer.inlineCallbacks def get_server_verify_key_v1_direct(self, server_name, key_ids): @@ -462,8 +615,13 @@ class Keyring(object): Returns: A deferred that completes when the keys are stored. """ - for key_id, key in verify_keys.items(): - # TODO(markjh): Store whether the keys have expired. - yield self.store.store_server_verify_key( - server_name, server_name, key.time_added, key - ) + # TODO(markjh): Store whether the keys have expired. + yield defer.gatherResults( + [ + self.store.store_server_verify_key( + server_name, server_name, key.time_added, key + ) + for key_id, key in verify_keys.items() + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 299493af9..bdfa24760 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -32,7 +32,8 @@ logger = logging.getLogger(__name__) class FederationBase(object): @defer.inlineCallbacks - def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False): + def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, + include_none=False): """Takes a list of PDUs and checks the signatures and hashs of each one. If a PDU fails its signature check then we check if we have it in the database and if not then request if from the originating server of @@ -50,84 +51,108 @@ class FederationBase(object): Returns: Deferred : A list of PDUs that have valid signatures and hashes. """ + deferreds = self._check_sigs_and_hashes(pdus) - signed_pdus = [] + def callback(pdu): + return pdu - @defer.inlineCallbacks - def do(pdu): - try: - new_pdu = yield self._check_sigs_and_hash(pdu) - signed_pdus.append(new_pdu) - except SynapseError: - # FIXME: We should handle signature failures more gracefully. + def errback(failure, pdu): + failure.trap(SynapseError) + return None + def try_local_db(res, pdu): + if not res: # Check local db. - new_pdu = yield self.store.get_event( + return self.store.get_event( pdu.event_id, allow_rejected=True, allow_none=True, ) - if new_pdu: - signed_pdus.append(new_pdu) - return + return res - # Check pdu.origin - if pdu.origin != origin: - try: - new_pdu = yield self.get_pdu( - destinations=[pdu.origin], - event_id=pdu.event_id, - outlier=outlier, - timeout=10000, - ) - - if new_pdu: - signed_pdus.append(new_pdu) - return - except: - pass + def try_remote(res, pdu): + if not res and pdu.origin != origin: + return self.get_pdu( + destinations=[pdu.origin], + event_id=pdu.event_id, + outlier=outlier, + timeout=10000, + ).addErrback(lambda e: None) + return res + def warn(res, pdu): + if not res: logger.warn( "Failed to find copy of %s with valid signature", pdu.event_id, ) + return res - yield defer.gatherResults( - [do(pdu) for pdu in pdus], + for pdu, deferred in zip(pdus, deferreds): + deferred.addCallbacks( + callback, errback, errbackArgs=[pdu] + ).addCallback( + try_local_db, pdu + ).addCallback( + try_remote, pdu + ).addCallback( + warn, pdu + ) + + valid_pdus = yield defer.gatherResults( + deferreds, consumeErrors=True ).addErrback(unwrapFirstError) - defer.returnValue(signed_pdus) + if include_none: + defer.returnValue(valid_pdus) + else: + defer.returnValue([p for p in valid_pdus if p]) - @defer.inlineCallbacks def _check_sigs_and_hash(self, pdu): - """Throws a SynapseError if the PDU does not have the correct + return self._check_sigs_and_hashes([pdu])[0] + + def _check_sigs_and_hashes(self, pdus): + """Throws a SynapseError if a PDU does not have the correct signatures. Returns: FrozenEvent: Either the given event or it redacted if it failed the content hash check. """ - # Check signatures are correct. - redacted_event = prune_event(pdu) - redacted_pdu_json = redacted_event.get_pdu_json() - try: - yield self.keyring.verify_json_for_server( - pdu.origin, redacted_pdu_json - ) - except SynapseError: + redacted_pdus = [ + prune_event(pdu) + for pdu in pdus + ] + + deferreds = self.keyring.verify_json_objects_for_server([ + (p.origin, p.get_pdu_json()) + for p in redacted_pdus + ]) + + def callback(_, pdu, redacted): + if not check_event_content_hash(pdu): + logger.warn( + "Event content has been tampered, redacting %s: %s", + pdu.event_id, pdu.get_pdu_json() + ) + return redacted + return pdu + + def errback(failure, pdu): + failure.trap(SynapseError) logger.warn( "Signature check failed for %s", pdu.event_id, ) - raise + return failure - if not check_event_content_hash(pdu): - logger.warn( - "Event content has been tampered, redacting.", - pdu.event_id, + for deferred, pdu, redacted in zip(deferreds, pdus, redacted_pdus): + deferred.addCallbacks( + callback, errback, + callbackArgs=[pdu, redacted], + errbackArgs=[pdu], ) - defer.returnValue(redacted_event) - defer.returnValue(pdu) + return deferreds diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index d3b46b24c..7736d14fb 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -30,6 +30,7 @@ import synapse.metrics from synapse.util.retryutils import get_retry_limiter, NotRetryingDestination +import copy import itertools import logging import random @@ -167,7 +168,7 @@ class FederationClient(FederationBase): # FIXME: We should handle signature failures more gracefully. pdus[:] = yield defer.gatherResults( - [self._check_sigs_and_hash(pdu) for pdu in pdus], + self._check_sigs_and_hashes(pdus), consumeErrors=True, ).addErrback(unwrapFirstError) @@ -230,7 +231,7 @@ class FederationClient(FederationBase): pdu = pdu_list[0] # Check signatures are correct. - pdu = yield self._check_sigs_and_hash(pdu) + pdu = yield self._check_sigs_and_hashes([pdu])[0] break @@ -327,6 +328,9 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def make_join(self, destinations, room_id, user_id): for destination in destinations: + if destination == self.server_name: + continue + try: ret = yield self.transport_layer.make_join( destination, room_id, user_id @@ -353,6 +357,9 @@ class FederationClient(FederationBase): @defer.inlineCallbacks def send_join(self, destinations, pdu): for destination in destinations: + if destination == self.server_name: + continue + try: time_now = self._clock.time_msec() _, content = yield self.transport_layer.send_join( @@ -374,17 +381,39 @@ class FederationClient(FederationBase): for p in content.get("auth_chain", []) ] - signed_state, signed_auth = yield defer.gatherResults( - [ - self._check_sigs_and_hash_and_fetch( - destination, state, outlier=True - ), - self._check_sigs_and_hash_and_fetch( - destination, auth_chain, outlier=True - ) - ], - consumeErrors=True - ).addErrback(unwrapFirstError) + pdus = { + p.event_id: p + for p in itertools.chain(state, auth_chain) + } + + valid_pdus = yield self._check_sigs_and_hash_and_fetch( + destination, pdus.values(), + outlier=True, + ) + + valid_pdus_map = { + p.event_id: p + for p in valid_pdus + } + + # NB: We *need* to copy to ensure that we don't have multiple + # references being passed on, as that causes... issues. + signed_state = [ + copy.copy(valid_pdus_map[p.event_id]) + for p in state + if p.event_id in valid_pdus_map + ] + + signed_auth = [ + valid_pdus_map[p.event_id] + for p in auth_chain + if p.event_id in valid_pdus_map + ] + + # NB: We *need* to copy to ensure that we don't have multiple + # references being passed on, as that causes... issues. + for s in signed_state: + s.internal_metadata = copy.deepcopy(s.internal_metadata) auth_chain.sort(key=lambda e: e.depth) @@ -396,7 +425,7 @@ class FederationClient(FederationBase): except CodeMessageException: raise except Exception as e: - logger.warn( + logger.exception( "Failed to send_join via %s: %s", destination, e.message ) diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 5bdf497b9..940a5f7e0 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from _base import SQLBaseStore +from _base import SQLBaseStore, cached from twisted.internet import defer @@ -71,6 +71,25 @@ class KeyStore(SQLBaseStore): desc="store_server_certificate", ) + @cached() + @defer.inlineCallbacks + def get_all_server_verify_keys(self, server_name): + rows = yield self._simple_select_list( + table="server_signature_keys", + keyvalues={ + "server_name": server_name, + }, + retcols=["key_id", "verify_key"], + desc="get_all_server_verify_keys", + ) + + defer.returnValue({ + row["key_id"]: decode_verify_key_bytes( + row["key_id"], str(row["verify_key"]) + ) + for row in rows + }) + @defer.inlineCallbacks def get_server_verify_keys(self, server_name, key_ids): """Retrieve the NACL verification key for a given server for the given @@ -81,24 +100,14 @@ class KeyStore(SQLBaseStore): Returns: (list of VerifyKey): The verification keys. """ - sql = ( - "SELECT key_id, verify_key FROM server_signature_keys" - " WHERE server_name = ?" - " AND key_id in (" + ",".join("?" for key_id in key_ids) + ")" - ) - - rows = yield self._execute_and_decode( - "get_server_verify_keys", sql, server_name, *key_ids - ) - - keys = [] - for row in rows: - key_id = row["key_id"] - key_bytes = row["verify_key"] - key = decode_verify_key_bytes(key_id, str(key_bytes)) - keys.append(key) - defer.returnValue(keys) + keys = yield self.get_all_server_verify_keys(server_name) + defer.returnValue({ + k: keys[k] + for k in key_ids + if k in keys and keys[k] + }) + @defer.inlineCallbacks def store_server_verify_key(self, server_name, from_server, time_now_ms, verify_key): """Stores a NACL verification key for the given server. @@ -109,7 +118,7 @@ class KeyStore(SQLBaseStore): ts_now_ms (int): The time now in milliseconds verification_key (VerifyKey): The NACL verify key. """ - return self._simple_upsert( + yield self._simple_upsert( table="server_signature_keys", keyvalues={ "server_name": server_name, @@ -123,6 +132,8 @@ class KeyStore(SQLBaseStore): desc="store_server_verify_key", ) + self.get_all_server_verify_keys.invalidate(server_name) + def store_server_keys_json(self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes): """Stores the JSON bytes for a set of keys from a server @@ -152,6 +163,7 @@ class KeyStore(SQLBaseStore): "ts_valid_until_ms": ts_expires_ms, "key_json": buffer(key_json_bytes), }, + desc="store_server_keys_json", ) def get_server_keys_json(self, server_keys): From f0dd568e1681b878d9e85a12c21bfa0d5540ff73 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 26 Jun 2015 11:25:00 +0100 Subject: [PATCH 008/266] Wait for previous attempts at fetching keys for a given server before trying to fetch more --- synapse/crypto/keyring.py | 83 ++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 873c9b40f..aa74d4d0c 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -27,6 +27,8 @@ from synapse.api.errors import SynapseError, Codes from synapse.util.retryutils import get_retry_limiter from synapse.util import unwrapFirstError +from synapse.util.async import ObservableDeferred + from OpenSSL import crypto from collections import namedtuple @@ -88,6 +90,8 @@ class Keyring(object): "Not signed with a supported algorithm", Codes.UNAUTHORIZED, )) + else: + deferreds[group_id] = defer.Deferred() group = KeyGroup(server_name, group_id, key_ids) @@ -133,10 +137,41 @@ class Keyring(object): Codes.UNAUTHORIZED, ) - deferreds.update(self.get_server_verify_keys( - group_id_to_group - )) + server_to_deferred = { + server_name: defer.Deferred() + for server_name, _ in server_and_json + } + # We want to wait for any previous lookups to complete before + # proceeding. + wait_on_deferred = self.wait_for_previous_lookups( + [server_name for server_name, _ in server_and_json], + server_to_deferred, + ) + + # Actually start fetching keys. + wait_on_deferred.addBoth( + lambda _: self.get_server_verify_keys(group_id_to_group, deferreds) + ) + + # When we've finished fetching all the keys for a given server_name, + # resolve the deferred passed to `wait_for_previous_lookups` so that + # any lookups waiting will proceed. + server_to_gids = {} + + def remove_deferreds(res, server_name, group_id): + server_to_gids[server_name].discard(group_id) + if not server_to_gids[server_name]: + server_to_deferred.pop(server_name).callback(None) + return res + + for g_id, deferred in deferreds.items(): + server_name = group_id_to_group[g_id].server_name + server_to_gids.setdefault(server_name, set()).add(g_id) + deferred.addBoth(remove_deferreds, server_name, g_id) + + # Pass those keys to handle_key_deferred so that the json object + # signatures can be verified return [ handle_key_deferred( group_id_to_group[g_id], @@ -145,7 +180,30 @@ class Keyring(object): for g_id in group_ids ] - def get_server_verify_keys(self, group_id_to_group): + @defer.inlineCallbacks + def wait_for_previous_lookups(self, server_names, server_to_deferred): + """Waits for any previous key lookups for the given servers to finish. + + Args: + server_names (list): list of server_names we want to lookup + server_to_deferred (dict): server_name to deferred which gets + resolved once we've finished looking up keys for that server + """ + while True: + wait_on = [ + self.key_downloads[server_name] + for server_name in server_names + if server_name in self.key_downloads + ] + if wait_on: + yield defer.DeferredList(wait_on) + else: + break + + for server_name, deferred in server_to_deferred: + self.key_downloads[server_name] = ObservableDeferred(deferred) + + def get_server_verify_keys(self, group_id_to_group, group_id_to_deferred): """Takes a dict of KeyGroups and tries to find at least one key for each group. """ @@ -157,11 +215,6 @@ class Keyring(object): self.get_keys_from_server, # Then try directly ) - group_deferreds = { - group_id: defer.Deferred() - for group_id in group_id_to_group - } - @defer.inlineCallbacks def do_iterations(): merged_results = {} @@ -182,7 +235,7 @@ class Keyring(object): for group in group_id_to_group.values(): for key_id in group.key_ids: if key_id in merged_results[group.server_name]: - group_deferreds.pop(group.group_id).callback(( + group_id_to_deferred[group.group_id].callback(( group.group_id, group.server_name, key_id, @@ -205,7 +258,7 @@ class Keyring(object): } for group in missing_groups.values(): - group_deferreds.pop(group.group_id).errback(SynapseError( + group_id_to_deferred[group.group_id].errback(SynapseError( 401, "No key for %s with id %s" % ( group.server_name, group.key_ids, @@ -214,13 +267,13 @@ class Keyring(object): )) def on_err(err): - for deferred in group_deferreds.values(): - deferred.errback(err) - group_deferreds.clear() + for deferred in group_id_to_deferred.values(): + if not deferred.called: + deferred.errback(err) do_iterations().addErrback(on_err) - return group_deferreds + return group_id_to_deferred @defer.inlineCallbacks def get_keys_from_store(self, server_name_and_key_ids): From 2124f668db4a7c19e3ed8357dad887711b3d3c35 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 Jun 2015 09:33:48 +0100 Subject: [PATCH 009/266] Add Content-Disposition headers to media repo v1 downloads --- synapse/rest/media/v1/base_resource.py | 23 ++++++++++++++++++---- synapse/rest/media/v1/download_resource.py | 8 ++++++-- synapse/rest/media/v1/upload_resource.py | 6 +++++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 6c83a9478..04410ab82 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -30,6 +30,7 @@ from synapse.util.async import ObservableDeferred import os +import cgi import logging logger = logging.getLogger(__name__) @@ -37,7 +38,9 @@ logger = logging.getLogger(__name__) def parse_media_id(request): try: - server_name, media_id = request.postpath + # This allows users to append e.g. /test.png to the URL. Useful for + # clients that parse the URL to see content type. + server_name, media_id = request.postpath[:2] return (server_name, media_id) except: raise SynapseError( @@ -128,12 +131,19 @@ class BaseMediaResource(Resource): media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + upload_name = params.get("filename", None) + else: + upload_name = None + yield self.store.store_cached_remote_media( origin=server_name, media_id=media_id, media_type=media_type, time_now_ms=self.clock.time_msec(), - upload_name=None, + upload_name=upload_name, media_length=length, filesystem_id=file_id, ) @@ -144,7 +154,7 @@ class BaseMediaResource(Resource): media_info = { "media_type": media_type, "media_length": length, - "upload_name": None, + "upload_name": upload_name, "created_ts": time_now_ms, "filesystem_id": file_id, } @@ -157,11 +167,16 @@ class BaseMediaResource(Resource): @defer.inlineCallbacks def _respond_with_file(self, request, media_type, file_path, - file_size=None): + file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + if upload_name: + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % (upload_name.encode("utf-8"),), + ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 0fe6abf64..2af48ab62 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -47,10 +47,12 @@ class DownloadResource(BaseMediaResource): media_type = media_info["media_type"] media_length = media_info["media_length"] + upload_name = media_info["upload_name"] file_path = self.filepaths.local_media_filepath(media_id) yield self._respond_with_file( - request, media_type, file_path, media_length + request, media_type, file_path, media_length, + upload_name=upload_name, ) @defer.inlineCallbacks @@ -60,11 +62,13 @@ class DownloadResource(BaseMediaResource): media_type = media_info["media_type"] media_length = media_info["media_length"] filesystem_id = media_info["filesystem_id"] + upload_name = media_info["upload_name"] file_path = self.filepaths.remote_media_filepath( server_name, filesystem_id ) yield self._respond_with_file( - request, media_type, file_path, media_length + request, media_type, file_path, media_length, + upload_name=upload_name, ) diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index cc571976a..92e855a44 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -84,6 +84,10 @@ class UploadResource(BaseMediaResource): code=413, ) + upload_name = request.args.get("filename", None) + if upload_name: + upload_name = upload_name[0] + headers = request.requestHeaders if headers.hasHeader("Content-Type"): @@ -99,7 +103,7 @@ class UploadResource(BaseMediaResource): # TODO(markjh): parse content-dispostion content_uri = yield self.create_content( - media_type, None, request.content.read(), + media_type, upload_name, request.content.read(), content_length, auth_user ) From 9beaedd1642673d36428dd796dda62f18a937c2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 Jun 2015 10:31:59 +0100 Subject: [PATCH 010/266] Enforce ascii filenames for uploads --- synapse/rest/media/v1/base_resource.py | 3 +++ synapse/rest/media/v1/upload_resource.py | 4 +++- synapse/util/stringutils.py | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 04410ab82..1b7517e2f 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -27,6 +27,7 @@ from twisted.web.resource import Resource from twisted.protocols.basic import FileSender from synapse.util.async import ObservableDeferred +from synapse.util.stringutils import is_ascii import os @@ -135,6 +136,8 @@ class BaseMediaResource(Resource): if content_disposition: _, params = cgi.parse_header(content_disposition[0],) upload_name = params.get("filename", None) + if upload_name and not is_ascii(upload_name): + upload_name = None else: upload_name = None diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 92e855a44..cdd1d44e0 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -15,7 +15,7 @@ from synapse.http.server import respond_with_json, request_handler -from synapse.util.stringutils import random_string +from synapse.util.stringutils import random_string, is_ascii from synapse.api.errors import SynapseError from twisted.web.server import NOT_DONE_YET @@ -87,6 +87,8 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: upload_name = upload_name[0] + if upload_name and not is_ascii(upload_name): + raise SynapseError(400, "filename must be ascii") headers = request.requestHeaders diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 52e66beae..7a1e96af3 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -33,3 +33,12 @@ def random_string_with_symbols(length): return ''.join( random.choice(_string_with_symbols) for _ in xrange(length) ) + + +def is_ascii(s): + try: + s.encode("ascii") + except UnicodeDecodeError: + return False + else: + return True From 901f56fa63c0f8cd4519e2a2f774724af9ea3431 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 Jun 2015 15:29:47 +0100 Subject: [PATCH 011/266] Add tables for receipts --- synapse/storage/__init__.py | 2 +- synapse/storage/schema/delta/21/receipts.sql | 35 ++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/21/receipts.sql diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index c137f4782..275598add 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -51,7 +51,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 20 +SCHEMA_VERSION = 21 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/21/receipts.sql b/synapse/storage/schema/delta/21/receipts.sql new file mode 100644 index 000000000..da9e18e90 --- /dev/null +++ b/synapse/storage/schema/delta/21/receipts.sql @@ -0,0 +1,35 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CREATE TABLE IF NOT EXISTS receipts_graph( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL +); + +CREATE INDEX receipts_graph_room_tuple ON receipts_graph( + room_id, receipt_type, user_id +); + +CREATE TABLE IF NOT EXISTS receipts_linearized ( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL +); + +CREATE INDEX receipts_graph_room_tuple ON receipts_graph( + room_id, receipt_type, user_id +); From 80a61330ee794147b213b1d54f2292a1c9adc002 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jul 2015 11:41:55 +0100 Subject: [PATCH 012/266] Add basic storage functions for handling of receipts --- synapse/storage/_base.py | 3 +- synapse/storage/receipts.py | 162 +++++++++++++++++++ synapse/storage/schema/delta/21/receipts.sql | 31 ++-- synapse/storage/util/id_generators.py | 7 +- 4 files changed, 186 insertions(+), 17 deletions(-) create mode 100644 synapse/storage/receipts.py diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 8d33def6c..8f812f0fd 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -329,13 +329,14 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine - self._stream_id_gen = StreamIdGenerator() + self._stream_id_gen = StreamIdGenerator("events", "stream_ordering") self._transaction_id_gen = IdGenerator("sent_transactions", "id", self) self._state_groups_id_gen = IdGenerator("state_groups", "id", self) self._access_tokens_id_gen = IdGenerator("access_tokens", "id", self) self._pushers_id_gen = IdGenerator("pushers", "id", self) self._push_rule_id_gen = IdGenerator("push_rules", "id", self) self._push_rules_enable_id_gen = IdGenerator("push_rules_enable", "id", self) + self._receipts_id_gen = StreamIdGenerator("receipts_linearized", "stream_id") def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py new file mode 100644 index 000000000..0168e74a0 --- /dev/null +++ b/synapse/storage/receipts.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore, cached + +from twisted.internet import defer + + +class ReceiptStore(SQLBaseStore): + + @cached + @defer.inlineCallbacks + def get_linearized_receipts_for_room(self, room_id): + rows = yield self._simple_select_list( + table="receipts_linearized", + keyvalues={"room_id": room_id}, + retcols=["receipt_type", "user_id", "event_id"], + desc="get_linearized_receipts_for_room", + ) + + result = {} + for row in rows: + result.setdefault( + row["event_id"], {} + ).setdefault( + row["receipt_type"], [] + ).append(row["user_id"]) + + defer.returnValue(result) + + @cached + @defer.inlineCallbacks + def get_graph_receipts_for_room(self, room_id): + rows = yield self._simple_select_list( + table="receipts_graph", + keyvalues={"room_id": room_id}, + retcols=["receipt_type", "user_id", "event_id"], + desc="get_linearized_receipts_for_room", + ) + + result = {} + for row in rows: + result.setdefault( + row["user_id"], {} + ).setdefault( + row["receipt_type"], [] + ).append(row["event_id"]) + + defer.returnValue(result) + + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, + user_id, event_id, stream_id): + self._simple_delete_txn( + txn, + table="receipts_linearized", + keyvalues={ + "stream_id": stream_id, + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + } + ) + + self._simple_insert_txn( + txn, + table="receipts_linearized", + values={ + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_id": event_id, + } + ) + + @defer.inlineCallbacks + def insert_receipt(self, room_id, receipt_type, user_id, event_ids): + if not event_ids: + return + + if len(event_ids) == 1: + linearized_event_id = event_ids[0] + else: + # we need to points in graph -> linearized form. + def graph_to_linear(txn): + query = ( + "SELECT event_id WHERE room_id = ? AND stream_ordering IN (" + " SELECT max(stream_ordering) WHERE event_id IN (%s)" + ")" + ) % (",".join(["?"] * len(event_ids))) + + txn.execute(query, [room_id] + event_ids) + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + # TODO: ARGH?! + return None + + linearized_event_id = yield self.runInteraction( + graph_to_linear, desc="insert_receipt_conv" + ) + + stream_id_manager = yield self._stream_id_gen.get_next(self) + with stream_id_manager() as stream_id: + yield self.runInteraction( + self.insert_linearized_receipt_txn, + room_id, receipt_type, user_id, linearized_event_id, + stream_id=stream_id, + desc="insert_linearized_receipt" + ) + + yield self.insert_graph_receipt( + room_id, receipt_type, user_id, event_ids + ) + + max_persisted_id = yield self._stream_id_gen.get_max_token(self) + defer.returnValue((stream_id, max_persisted_id)) + + def insert_graph_receipt(self, room_id, receipt_type, + user_id, event_ids): + return self.runInteraction( + self.insert_graph_receipt_txn, + room_id, receipt_type, user_id, event_ids, + desc="insert_graph_receipt" + ) + + def insert_graph_receipt_txn(self, txn, room_id, receipt_type, + user_id, event_ids): + self._simple_delete_txn( + txn, + table="receipts_graph", + keyvalues={ + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + } + ) + self._simple_insert_many_txn( + txn, + table="receipts_graph", + values=[ + { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_id": event_id, + } + for event_id in event_ids + ], + ) diff --git a/synapse/storage/schema/delta/21/receipts.sql b/synapse/storage/schema/delta/21/receipts.sql index da9e18e90..ccd64ec7f 100644 --- a/synapse/storage/schema/delta/21/receipts.sql +++ b/synapse/storage/schema/delta/21/receipts.sql @@ -1,16 +1,18 @@ -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + CREATE TABLE IF NOT EXISTS receipts_graph( room_id TEXT NOT NULL, @@ -24,12 +26,13 @@ CREATE INDEX receipts_graph_room_tuple ON receipts_graph( ); CREATE TABLE IF NOT EXISTS receipts_linearized ( + stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, event_id TEXT NOT NULL ); -CREATE INDEX receipts_graph_room_tuple ON receipts_graph( +CREATE INDEX receipts_linearized_room_tuple ON receipts_graph( room_id, receipt_type, user_id ); diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 89d1643f1..b39006315 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -72,7 +72,10 @@ class StreamIdGenerator(object): with stream_id_gen.get_next_txn(txn) as stream_id: # ... persist event ... """ - def __init__(self): + def __init__(self, table, column): + self.table = table + self.column = column + self._lock = threading.Lock() self._current_max = None @@ -126,7 +129,7 @@ class StreamIdGenerator(object): def _get_or_compute_current_max(self, txn): with self._lock: - txn.execute("SELECT MAX(stream_ordering) FROM events") + txn.execute("SELECT MAX(%s) FROM %s" % (self.column, self.table)) rows = txn.fetchall() val, = rows[0] From 0862fed2a89723e8aa6a4df9f1dbad975a7fbffc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jul 2015 17:19:31 +0100 Subject: [PATCH 013/266] Add basic ReceiptHandler --- synapse/handlers/receipts.py | 130 +++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 synapse/handlers/receipts.py diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py new file mode 100644 index 000000000..f3f705063 --- /dev/null +++ b/synapse/handlers/receipts.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains handlers for federation events.""" + +from ._base import BaseHandler + +from twisted.internet import defer + +from synapse.util.logcontext import PreserveLoggingContext + +import logging + + +logger = logging.getLogger(__name__) + + +class ReceiptsHandler(BaseHandler): + def __init__(self, hs): + super(ReceiptsHandler, self).__init__(hs) + + self.federation.register_edu_handler( + "m.receipt", self._received_remote_receipt + ) + + self._latest_serial = 0 + + @defer.inlineCallbacks + def received_client_receipt(self, room_id, receipt_type, user_id, + event_id): + # 1. Persist. + # 2. Notify local clients + # 3. Notify remote servers + + receipt = { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_ids": [event_id], + } + + yield self._handle_new_receipts([receipt]) + self._push_remotes([receipt]) + + @defer.inlineCallbacks + def _received_remote_receipt(self, origin, content): + receipts = [ + { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_ids": [event_id], + } + for room_id, room_values in content.items() + for event_id, ev_values in room_values.items() + for receipt_type, users in ev_values.items() + for user_id in users + ] + + yield self._handle_new_receipts(receipts) + + @defer.inlineCallbacks + def _handle_new_receipts(self, receipts): + for receipt in receipts: + room_id = receipt["room_id"] + receipt_type = receipt["receipt_type"] + user_id = receipt["user_id"] + event_ids = receipt["event_ids"] + + stream_id, max_persisted_id = yield self.store.insert_receipt( + room_id, receipt_type, user_id, event_ids, + ) + + # TODO: Use max_persisted_id + + self._latest_serial = max(self._latest_serial, stream_id) + + with PreserveLoggingContext(): + self.notifier.on_new_user_event( + "recei[t_key", self._latest_serial, rooms=[room_id] + ) + + localusers = set() + remotedomains = set() + + rm_handler = self.homeserver.get_handlers().room_member_handler + yield rm_handler.fetch_room_distributions_into( + room_id, localusers=localusers, remotedomains=remotedomains + ) + + receipt["remotedomains"] = remotedomains + + self.notifier.on_new_user_event( + "receipt_key", self._latest_room_serial, rooms=[room_id] + ) + + def _push_remotes(self, receipts): + # TODO: Some of this stuff should be coallesced. + for receipt in receipts: + room_id = receipt["room_id"] + receipt_type = receipt["receipt_type"] + user_id = receipt["user_id"] + event_ids = receipt["event_ids"] + remotedomains = receipt["remotedomains"] + + for domain in remotedomains: + self.federation.send_edu( + destination=domain, + edu_type="m.receipt", + content={ + room_id: { + event_id: { + receipt_type: [user_id] + } + for event_id in event_ids + }, + }, + ) From ddf7979531cf514ee54e280714bdf228d88a62d5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 2 Jul 2015 11:40:22 +0100 Subject: [PATCH 014/266] Add receipts_key to StreamToken --- synapse/notifier.py | 2 +- synapse/streams/events.py | 3 ++- synapse/types.py | 6 +++++- tests/rest/client/v1/test_presence.py | 4 ++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/notifier.py b/synapse/notifier.py index bdd03dcbe..f13164dbd 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -283,7 +283,7 @@ class Notifier(object): @defer.inlineCallbacks def wait_for_events(self, user, rooms, timeout, callback, - from_token=StreamToken("s0", "0", "0")): + from_token=StreamToken("s0", "0", "0", "0")): """Wait until the callback returns a non empty response or the timeout fires. """ diff --git a/synapse/streams/events.py b/synapse/streams/events.py index dff7970be..0a1a3a3d0 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -62,7 +62,8 @@ class EventSources(object): ), typing_key=( yield self.sources["typing"].get_current_key() - ) + ), + receipt_key="0", ) defer.returnValue(token) diff --git a/synapse/types.py b/synapse/types.py index 1b21160c5..dd1b10d64 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -100,7 +100,7 @@ class EventID(DomainSpecificString): class StreamToken( namedtuple( "Token", - ("room_key", "presence_key", "typing_key") + ("room_key", "presence_key", "typing_key", "receipt_key") ) ): _SEPARATOR = "_" @@ -109,6 +109,9 @@ class StreamToken( def from_string(cls, string): try: keys = string.split(cls._SEPARATOR) + if len(keys) == len(cls._fields) - 1: + # i.e. old token from before receipt_key + keys.append("0") return cls(*keys) except: raise SynapseError(400, "Invalid Token") @@ -131,6 +134,7 @@ class StreamToken( (other_token.room_stream_id < self.room_stream_id) or (int(other_token.presence_key) < int(self.presence_key)) or (int(other_token.typing_key) < int(self.typing_key)) + or (int(other_token.receipt_key) < int(self.receipt_key)) ) def copy_and_advance(self, key, new_value): diff --git a/tests/rest/client/v1/test_presence.py b/tests/rest/client/v1/test_presence.py index 4b32c7a20..089a71568 100644 --- a/tests/rest/client/v1/test_presence.py +++ b/tests/rest/client/v1/test_presence.py @@ -357,7 +357,7 @@ class PresenceEventStreamTestCase(unittest.TestCase): # all be ours # I'll already get my own presence state change - self.assertEquals({"start": "0_1_0", "end": "0_1_0", "chunk": []}, + self.assertEquals({"start": "0_1_0_0", "end": "0_1_0_0", "chunk": []}, response ) @@ -376,7 +376,7 @@ class PresenceEventStreamTestCase(unittest.TestCase): "/events?from=s0_1_0&timeout=0", None) self.assertEquals(200, code) - self.assertEquals({"start": "s0_1_0", "end": "s0_2_0", "chunk": [ + self.assertEquals({"start": "s0_1_0_0", "end": "s0_2_0_0", "chunk": [ {"type": "m.presence", "content": { "user_id": "@banana:test", From bd1236c0ee5b3703de51dc773a02da92e0960d0f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 2 Jul 2015 11:40:56 +0100 Subject: [PATCH 015/266] Consolidate duplicate code in notifier --- synapse/handlers/presence.py | 2 +- synapse/handlers/receipts.py | 4 ++-- synapse/handlers/typing.py | 2 +- synapse/notifier.py | 35 +++++++++++------------------------ tests/handlers/test_typing.py | 20 ++++++++++---------- 5 files changed, 25 insertions(+), 38 deletions(-) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 7c0319831..341a516da 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -992,7 +992,7 @@ class PresenceHandler(BaseHandler): room_ids([str]): List of room_ids to notify. """ with PreserveLoggingContext(): - self.notifier.on_new_user_event( + self.notifier.on_new_event( "presence_key", self._user_cachemap_latest_serial, users_to_push, diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f3f705063..f0d12d35f 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -88,7 +88,7 @@ class ReceiptsHandler(BaseHandler): self._latest_serial = max(self._latest_serial, stream_id) with PreserveLoggingContext(): - self.notifier.on_new_user_event( + self.notifier.on_new_event( "recei[t_key", self._latest_serial, rooms=[room_id] ) @@ -102,7 +102,7 @@ class ReceiptsHandler(BaseHandler): receipt["remotedomains"] = remotedomains - self.notifier.on_new_user_event( + self.notifier.on_new_event( "receipt_key", self._latest_room_serial, rooms=[room_id] ) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a9895292c..026bd2b9d 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -218,7 +218,7 @@ class TypingNotificationHandler(BaseHandler): self._room_serials[room_id] = self._latest_room_serial with PreserveLoggingContext(): - self.notifier.on_new_user_event( + self.notifier.on_new_event( "typing_key", self._latest_room_serial, rooms=[room_id] ) diff --git a/synapse/notifier.py b/synapse/notifier.py index f13164dbd..85ae34313 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -221,16 +221,7 @@ class Notifier(object): event ) - room_id = event.room_id - - room_user_streams = self.room_to_user_streams.get(room_id, set()) - - user_streams = room_user_streams.copy() - - for user in extra_users: - user_stream = self.user_to_user_stream.get(str(user)) - if user_stream is not None: - user_streams.add(user_stream) + app_streams = set() for appservice in self.appservice_to_user_streams: # TODO (kegan): Redundant appservice listener checks? @@ -242,24 +233,20 @@ class Notifier(object): app_user_streams = self.appservice_to_user_streams.get( appservice, set() ) - user_streams |= app_user_streams + app_streams |= app_user_streams - logger.debug("on_new_room_event listeners %s", user_streams) - - time_now_ms = self.clock.time_msec() - for user_stream in user_streams: - try: - user_stream.notify( - "room_key", "s%d" % (room_stream_id,), time_now_ms - ) - except: - logger.exception("Failed to notify listener") + self.on_new_event( + "room_key", room_stream_id, + users=extra_users, + rooms=[event.room_id], + extra_streams=app_streams, + ) @defer.inlineCallbacks @log_function - def on_new_user_event(self, stream_key, new_token, users=[], rooms=[]): - """ Used to inform listeners that something has happend - presence/user event wise. + def on_new_event(self, stream_key, new_token, users=[], rooms=[], + extra_streams=set()): + """ Used to inform listeners that something has happend event wise. Will wake up all listeners for the given users and rooms. """ diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 7ccbe2ea9..41bb08b7c 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -66,8 +66,8 @@ class TypingNotificationsTestCase(unittest.TestCase): self.mock_federation_resource = MockHttpResource() - mock_notifier = Mock(spec=["on_new_user_event"]) - self.on_new_user_event = mock_notifier.on_new_user_event + mock_notifier = Mock(spec=["on_new_event"]) + self.on_new_event = mock_notifier.on_new_event self.auth = Mock(spec=[]) @@ -182,7 +182,7 @@ class TypingNotificationsTestCase(unittest.TestCase): timeout=20000, ) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 1, rooms=[self.room_id]), ]) @@ -245,7 +245,7 @@ class TypingNotificationsTestCase(unittest.TestCase): ) ) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 1, rooms=[self.room_id]), ]) @@ -299,7 +299,7 @@ class TypingNotificationsTestCase(unittest.TestCase): room_id=self.room_id, ) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 1, rooms=[self.room_id]), ]) @@ -331,10 +331,10 @@ class TypingNotificationsTestCase(unittest.TestCase): timeout=10000, ) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 1, rooms=[self.room_id]), ]) - self.on_new_user_event.reset_mock() + self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 1) events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) @@ -351,7 +351,7 @@ class TypingNotificationsTestCase(unittest.TestCase): self.clock.advance_time(11) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 2, rooms=[self.room_id]), ]) @@ -377,10 +377,10 @@ class TypingNotificationsTestCase(unittest.TestCase): timeout=10000, ) - self.on_new_user_event.assert_has_calls([ + self.on_new_event.assert_has_calls([ call('typing_key', 3, rooms=[self.room_id]), ]) - self.on_new_user_event.reset_mock() + self.on_new_event.reset_mock() self.assertEquals(self.event_source.get_current_key(), 3) events = yield self.event_source.get_new_events_for_user(self.u_apple, 0, None) From ac78e60de6fd2e409d6abd806e8850d539bbe644 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 2 Jul 2015 13:18:41 +0100 Subject: [PATCH 016/266] Add stream_id index --- synapse/storage/schema/delta/21/receipts.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/21/receipts.sql b/synapse/storage/schema/delta/21/receipts.sql index ccd64ec7f..ac7738e37 100644 --- a/synapse/storage/schema/delta/21/receipts.sql +++ b/synapse/storage/schema/delta/21/receipts.sql @@ -33,6 +33,10 @@ CREATE TABLE IF NOT EXISTS receipts_linearized ( event_id TEXT NOT NULL ); -CREATE INDEX receipts_linearized_room_tuple ON receipts_graph( +CREATE INDEX receipts_linearized_room_tuple ON receipts_linearized( room_id, receipt_type, user_id ); + +CREATE INDEX receipts_linearized_id ON receipts_linearized( + stream_id +); From 1a605456260bfb46d8bb9cff2d40d19aec03daa4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 2 Jul 2015 16:20:10 +0100 Subject: [PATCH 017/266] Add basic impl for room history ACL on GET /messages client API --- synapse/api/constants.py | 2 ++ synapse/handlers/message.py | 33 ++++++++++++++++++- synapse/storage/state.py | 63 +++++++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index d8a18ee87..3e15e8a9d 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -75,6 +75,8 @@ class EventTypes(object): Redaction = "m.room.redaction" Feedback = "m.room.message.feedback" + RoomHistoryVisibility = "m.room.history_visibility" + # These are used for validation Message = "m.room.message" Topic = "m.room.topic" diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e324662f1..17c75f33c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -113,11 +113,42 @@ class MessageHandler(BaseHandler): "room_key", next_key ) + if not events: + defer.returnValue({ + "chunk": [], + "start": pagin_config.from_token.to_string(), + "end": next_token.to_string(), + }) + + states = yield self.store.get_state_for_events( + room_id, [e.event_id for e in events], + ) + + events_and_states = zip(events, states) + + def allowed(event_and_state): + _, state = event_and_state + + membership = state.get((EventTypes.Member, user_id), None) + if membership and membership.membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history and history.content["visibility"] == "after_join": + return False + + events_and_states = filter(allowed, events_and_states) + events = [ + ev + for ev, _ in events_and_states + ] + time_now = self.clock.time_msec() chunk = { "chunk": [ - serialize_event(e, time_now, as_client_event) for e in events + serialize_event(e, time_now, as_client_event) + for e in events ], "start": pagin_config.from_token.to_string(), "end": next_token.to_string(), diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f2b17f29e..d7844edee 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -92,11 +92,11 @@ class StateStore(SQLBaseStore): defer.returnValue(dict(state_list)) @cached(num_args=1) - def _fetch_events_for_group(self, state_group, events): + def _fetch_events_for_group(self, key, events): return self._get_events( events, get_prev_content=False ).addCallback( - lambda evs: (state_group, evs) + lambda evs: (key, evs) ) def _store_state_groups_txn(self, txn, event, context): @@ -194,6 +194,65 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) + @defer.inlineCallbacks + def get_state_for_events(self, room_id, event_ids): + def f(txn): + groups = set() + event_to_group = {} + for event_id in event_ids: + # TODO: Remove this loop. + group = self._simple_select_one_onecol_txn( + txn, + table="event_to_state_groups", + keyvalues={"event_id": event_id}, + retcol="state_group", + allow_none=True, + ) + if group: + event_to_group[event_id] = group + groups.add(group) + + group_to_state_ids = {} + for group in groups: + state_ids = self._simple_select_onecol_txn( + txn, + table="state_groups_state", + keyvalues={"state_group": group}, + retcol="event_id", + ) + + group_to_state_ids[group] = state_ids + + return event_to_group, group_to_state_ids + + res = yield self.runInteraction( + "annotate_events_with_state_groups", + f, + ) + + event_to_group, group_to_state_ids = res + + state_list = yield defer.gatherResults( + [ + self._fetch_events_for_group(group, vals) + for group, vals in group_to_state_ids.items() + ], + consumeErrors=True, + ) + + state_dict = { + group: { + (ev.type, ev.state_key): ev + for ev in state + } + for group, state in state_list + } + + defer.returnValue([ + state_dict.get(event_to_group.get(event, None), None) + for event in event_ids + ]) + def _make_group_id(clock): return str(int(clock.time_msec())) + random_string(5) From 41938afed884361451ad6e91eb44e805ebbdaeb0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 2 Jul 2015 17:02:10 +0100 Subject: [PATCH 018/266] Make v1 initial syncs respect room history ACL --- synapse/handlers/message.py | 61 ++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 17c75f33c..00c7dbec8 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -120,28 +120,7 @@ class MessageHandler(BaseHandler): "end": next_token.to_string(), }) - states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], - ) - - events_and_states = zip(events, states) - - def allowed(event_and_state): - _, state = event_and_state - - membership = state.get((EventTypes.Member, user_id), None) - if membership and membership.membership == Membership.JOIN: - return True - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history and history.content["visibility"] == "after_join": - return False - - events_and_states = filter(allowed, events_and_states) - events = [ - ev - for ev, _ in events_and_states - ] + events = yield self._filter_events_for_client(user_id, room_id, events) time_now = self.clock.time_msec() @@ -156,6 +135,36 @@ class MessageHandler(BaseHandler): defer.returnValue(chunk) + @defer.inlineCallbacks + def _filter_events_for_client(self, user_id, room_id, events): + states = yield self.store.get_state_for_events( + room_id, [e.event_id for e in events], + ) + + events_and_states = zip(events, states) + + def allowed(event_and_state): + event, state = event_and_state + + if event.type == EventTypes.RoomHistoryVisibility: + return True + + membership = state.get((EventTypes.Member, user_id), None) + if membership and membership.membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history and history.content.get("visibility", None) == "after_join": + return False + + return True + + events_and_states = filter(allowed, events_and_states) + defer.returnValue([ + ev + for ev, _ in events_and_states + ]) + @defer.inlineCallbacks def create_and_send_event(self, event_dict, ratelimit=True, client=None, txn_id=None): @@ -347,6 +356,10 @@ class MessageHandler(BaseHandler): ] ).addErrback(unwrapFirstError) + messages = yield self._filter_events_for_client( + user_id, event.room_id, messages + ) + start_token = now_token.copy_and_replace("room_key", token[0]) end_token = now_token.copy_and_replace("room_key", token[1]) time_now = self.clock.time_msec() @@ -448,6 +461,10 @@ class MessageHandler(BaseHandler): consumeErrors=True, ).addErrback(unwrapFirstError) + messages = yield self._filter_events_for_client( + user_id, room_id, messages + ) + start_token = now_token.copy_and_replace("room_key", token[0]) end_token = now_token.copy_and_replace("room_key", token[1]) From 00ab882ed66d1ecdbd9de15c7ac591f4ee07b9f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jul 2015 10:31:17 +0100 Subject: [PATCH 019/266] Add m.room.history_visibility to list of auth events --- synapse/api/auth.py | 2 +- synapse/events/utils.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 4da62e5d8..deca747f7 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) AuthEventTypes = ( EventTypes.Create, EventTypes.Member, EventTypes.PowerLevels, - EventTypes.JoinRules, + EventTypes.JoinRules, EventTypes.RoomHistoryVisibility, ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 1aa952150..4c82780f4 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -74,6 +74,8 @@ def prune_event(event): ) elif event_type == EventTypes.Aliases: add_fields("aliases") + elif event_type == EventTypes.RoomHistoryVisibility: + add_fields("visibility") allowed_fields = { k: v From 12b83f1a0d1e666b8dc629358a904613b23aac11 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jul 2015 11:24:55 +0100 Subject: [PATCH 020/266] If user supplies filename in URL when downloading from media repo, use that name in Content Disposition --- synapse/rest/media/v1/base_resource.py | 5 ++++- synapse/rest/media/v1/download_resource.py | 16 +++++++++------- synapse/rest/media/v1/thumbnail_resource.py | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 1b7517e2f..c43ae0314 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -42,7 +42,10 @@ def parse_media_id(request): # This allows users to append e.g. /test.png to the URL. Useful for # clients that parse the URL to see content type. server_name, media_id = request.postpath[:2] - return (server_name, media_id) + if len(request.postpath) > 2 and is_ascii(request.postpath[-1]): + return server_name, media_id, request.postpath[-1] + else: + return server_name, media_id, None except: raise SynapseError( 404, diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 2af48ab62..ab384e538 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -32,14 +32,16 @@ class DownloadResource(BaseMediaResource): @request_handler @defer.inlineCallbacks def _async_render_GET(self, request): - server_name, media_id = parse_media_id(request) + server_name, media_id, name = parse_media_id(request) if server_name == self.server_name: - yield self._respond_local_file(request, media_id) + yield self._respond_local_file(request, media_id, name) else: - yield self._respond_remote_file(request, server_name, media_id) + yield self._respond_remote_file( + request, server_name, media_id, name + ) @defer.inlineCallbacks - def _respond_local_file(self, request, media_id): + def _respond_local_file(self, request, media_id, name): media_info = yield self.store.get_local_media(media_id) if not media_info: self._respond_404(request) @@ -47,7 +49,7 @@ class DownloadResource(BaseMediaResource): media_type = media_info["media_type"] media_length = media_info["media_length"] - upload_name = media_info["upload_name"] + upload_name = name if name else media_info["upload_name"] file_path = self.filepaths.local_media_filepath(media_id) yield self._respond_with_file( @@ -56,13 +58,13 @@ class DownloadResource(BaseMediaResource): ) @defer.inlineCallbacks - def _respond_remote_file(self, request, server_name, media_id): + def _respond_remote_file(self, request, server_name, media_id, name): media_info = yield self._get_remote_media(server_name, media_id) media_type = media_info["media_type"] media_length = media_info["media_length"] filesystem_id = media_info["filesystem_id"] - upload_name = media_info["upload_name"] + upload_name = name if name else media_info["upload_name"] file_path = self.filepaths.remote_media_filepath( server_name, filesystem_id diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 1dadd880b..4a9b6d8ee 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -36,7 +36,7 @@ class ThumbnailResource(BaseMediaResource): @request_handler @defer.inlineCallbacks def _async_render_GET(self, request): - server_name, media_id = parse_media_id(request) + server_name, media_id, _ = parse_media_id(request) width = parse_integer(request, "width") height = parse_integer(request, "height") method = parse_string(request, "method", "scale") From 400894616d15a01c168b2356d950972b6e746496 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jul 2015 14:51:01 +0100 Subject: [PATCH 021/266] Respect m.room.history_visibility in v2_alpha sync API --- synapse/handlers/sync.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd8c60368..5078c4e45 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -292,6 +292,36 @@ class SyncHandler(BaseHandler): next_batch=now_token, )) + @defer.inlineCallbacks + def _filter_events_for_client(self, user_id, room_id, events): + states = yield self.store.get_state_for_events( + room_id, [e.event_id for e in events], + ) + + events_and_states = zip(events, states) + + def allowed(event_and_state): + event, state = event_and_state + + if event.type == EventTypes.RoomHistoryVisibility: + return True + + membership = state.get((EventTypes.Member, user_id), None) + if membership and membership.membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history and history.content.get("visibility", None) == "after_join": + return False + + return True + + events_and_states = filter(allowed, events_and_states) + defer.returnValue([ + ev + for ev, _ in events_and_states + ]) + @defer.inlineCallbacks def load_filtered_recents(self, room_id, sync_config, now_token, since_token=None): @@ -313,6 +343,9 @@ class SyncHandler(BaseHandler): (room_key, _) = keys end_key = "s" + room_key.split('-')[-1] loaded_recents = sync_config.filter.filter_room_events(events) + loaded_recents = yield self._filter_events_for_client( + sync_config.user.to_string(), room_id, loaded_recents, + ) loaded_recents.extend(recents) recents = loaded_recents if len(events) <= load_limit: From c3e2600c6727534d4ebf20dcd8219e248ca31461 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Jul 2015 17:52:57 +0100 Subject: [PATCH 022/266] Filter and redact events that the other server doesn't have permission to see during backfill --- synapse/handlers/federation.py | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b5d882fd6..663d05c63 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -31,6 +31,8 @@ from synapse.crypto.event_signing import ( ) from synapse.types import UserID +from synapse.events.utils import prune_event + from synapse.util.retryutils import NotRetryingDestination from twisted.internet import defer @@ -222,6 +224,46 @@ class FederationHandler(BaseHandler): "user_joined_room", user=user, room_id=event.room_id ) + @defer.inlineCallbacks + def _filter_events_for_server(self, server_name, room_id, events): + states = yield self.store.get_state_for_events( + room_id, [e.event_id for e in events], + ) + + events_and_states = zip(events, states) + + def redact_disallowed(event_and_state): + event, state = event_and_state + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history and history.content.get("visibility", None) == "after_join": + for ev in state.values(): + if ev.type != EventTypes.Member: + continue + try: + domain = UserID.from_string(ev.state_key).domain + except: + continue + + if domain != server_name: + continue + + if ev.membership == Membership.JOIN: + return event + else: + return prune_event(event) + + return event + + res = map(redact_disallowed, events_and_states) + + logger.info("_filter_events_for_server %r", res) + + defer.returnValue(res) + @log_function @defer.inlineCallbacks def backfill(self, dest, room_id, limit, extremities=[]): @@ -882,6 +924,8 @@ class FederationHandler(BaseHandler): limit ) + events = yield self._filter_events_for_server(origin, room_id, events) + defer.returnValue(events) @defer.inlineCallbacks From 65e69dec8b202e3f03063b18ebde55ceee69e2e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 6 Jul 2015 09:31:40 +0100 Subject: [PATCH 023/266] Don't explode if we don't recognize one of the event_ids in the backfill request --- synapse/storage/event_federation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index ace745953..c71019d93 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -408,10 +408,12 @@ class EventFederationStore(SQLBaseStore): keyvalues={ "event_id": event_id, }, - retcol="depth" + retcol="depth", + allow_none=True, ) - queue.put((-depth, event_id)) + if depth: + queue.put((-depth, event_id)) while not queue.empty() and len(event_results) < limit: try: From fb47c3cfbe213c01b25e5605b81c998b764e2bf8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 6 Jul 2015 13:05:52 +0100 Subject: [PATCH 024/266] Rename key and values for m.room.history_visibility. Support 'invited' value --- synapse/events/utils.py | 2 +- synapse/handlers/federation.py | 34 ++++++++++++++++++++-------------- synapse/handlers/message.py | 24 ++++++++++++++++++++---- synapse/handlers/sync.py | 25 ++++++++++++++++++++----- 4 files changed, 61 insertions(+), 24 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 4c82780f4..7bd78343f 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -75,7 +75,7 @@ def prune_event(event): elif event_type == EventTypes.Aliases: add_fields("aliases") elif event_type == EventTypes.RoomHistoryVisibility: - add_fields("visibility") + add_fields("history_visibility") allowed_fields = { k: v diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 663d05c63..cd3867ed9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -239,22 +239,28 @@ class FederationHandler(BaseHandler): return event history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history and history.content.get("visibility", None) == "after_join": - for ev in state.values(): - if ev.type != EventTypes.Member: - continue - try: - domain = UserID.from_string(ev.state_key).domain - except: - continue + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + for ev in state.values(): + if ev.type != EventTypes.Member: + continue + try: + domain = UserID.from_string(ev.state_key).domain + except: + continue - if domain != server_name: - continue + if domain != server_name: + continue - if ev.membership == Membership.JOIN: - return event - else: - return prune_event(event) + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + return prune_event(event) return event diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 00c7dbec8..d8b117612 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -149,13 +149,29 @@ class MessageHandler(BaseHandler): if event.type == EventTypes.RoomHistoryVisibility: return True - membership = state.get((EventTypes.Member, user_id), None) - if membership and membership.membership == Membership.JOIN: + membership_ev = state.get((EventTypes.Member, user_id), None) + if membership_ev: + membership = membership_ev.membership + else: + membership = Membership.LEAVE + + if membership == Membership.JOIN: return True history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history and history.content.get("visibility", None) == "after_join": - return False + if history: + visibility = history.content.get("history_visibility", "shared") + else: + visibility = "shared" + + if visibility == "public": + return True + elif visibility == "shared": + return True + elif visibility == "joined": + return membership == Membership.JOIN + elif visibility == "invited": + return membership == Membership.INVITE return True diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5078c4e45..6cff6230c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -306,16 +306,31 @@ class SyncHandler(BaseHandler): if event.type == EventTypes.RoomHistoryVisibility: return True - membership = state.get((EventTypes.Member, user_id), None) - if membership and membership.membership == Membership.JOIN: + membership_ev = state.get((EventTypes.Member, user_id), None) + if membership_ev: + membership = membership_ev.membership + else: + membership = Membership.LEAVE + + if membership == Membership.JOIN: return True history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history and history.content.get("visibility", None) == "after_join": - return False + if history: + visibility = history.content.get("history_visibility", "shared") + else: + visibility = "shared" + + if visibility == "public": + return True + elif visibility == "shared": + return True + elif visibility == "joined": + return membership == Membership.JOIN + elif visibility == "invited": + return membership == Membership.INVITE return True - events_and_states = filter(allowed, events_and_states) defer.returnValue([ ev From 1a3255b507550c76f11251c890a43947b1f4e272 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 6 Jul 2015 13:09:16 +0100 Subject: [PATCH 025/266] Add m.room.history_visibility to newly created rooms' m.room.power_levels --- synapse/api/auth.py | 1 + synapse/handlers/room.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index deca747f7..1a25bf108 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -575,6 +575,7 @@ class Auth(object): levels_to_check = [ ("users_default", []), ("events_default", []), + ("state_default", []), ("ban", []), ("redact", []), ("kick", []), diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 4bd027d9b..891707df4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -213,6 +213,7 @@ class RoomCreationHandler(BaseHandler): "events": { EventTypes.Name: 100, EventTypes.PowerLevels: 100, + EventTypes.RoomHistoryVisibility: 100, }, "events_default": 0, "state_default": 50, From a7dcbfe430f31a6e9fc6b5920ddff39eb1b1e3b4 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 6 Jul 2015 16:47:17 +0100 Subject: [PATCH 026/266] Don't 500 if a group is missing from the regex --- synapse/http/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 807ff95c6..b60e905a6 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -207,7 +207,7 @@ class JsonResource(HttpServer, resource.Resource): incoming_requests_counter.inc(request.method, servlet_classname) args = [ - urllib.unquote(u).decode("UTF-8") for u in m.groups() + urllib.unquote(u).decode("UTF-8") if u else u for u in m.groups() ] callback_return = yield callback(request, *args) From b5770f89478509b5c4c1a610753989fbe29e35e7 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 6 Jul 2015 18:46:47 +0100 Subject: [PATCH 027/266] Add store for client end to end keys --- synapse/storage/__init__.py | 4 +- synapse/storage/end_to_end_keys.py | 133 ++++++++++++++++++ .../schema/delta/21/end_to_end_keys.sql | 35 +++++ 3 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/end_to_end_keys.py create mode 100644 synapse/storage/schema/delta/21/end_to_end_keys.sql diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index c137f4782..e089d8167 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -37,6 +37,7 @@ from .rejections import RejectionsStore from .state import StateStore from .signatures import SignatureStore from .filtering import FilteringStore +from .end_to_end_keys import EndToEndKeyStore import fnmatch @@ -51,7 +52,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 20 +SCHEMA_VERSION = 21 dir_path = os.path.abspath(os.path.dirname(__file__)) @@ -74,6 +75,7 @@ class DataStore(RoomMemberStore, RoomStore, PushRuleStore, ApplicationServiceTransactionStore, EventsStore, + EndToEndKeyStore, ): def __init__(self, hs): diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py new file mode 100644 index 000000000..936a64669 --- /dev/null +++ b/synapse/storage/end_to_end_keys.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from _base import SQLBaseStore + + +class EndToEndKeyStore(SQLBaseStore): + def set_e2e_device_keys(self, user_id, device_id, time_now, json_bytes): + return self._simple_upsert( + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "ts_added_ms": time_now, + "key_json": json_bytes, + } + ) + + def get_e2e_device_keys(self, query_list): + """Fetch a list of device keys. + Args: + query_list(list): List of pairs of user_ids and device_ids. + Returns: + Dict mapping from user-id to dict mapping from device_id to + key json byte strings. + """ + def _get_e2e_device_keys(txn): + result = {} + for user_id, device_id in query_list: + user_result = result.setdefault(user_id, {}) + keyvalues = {"user_id": user_id} + if device_id: + keyvalues["device_id"] = device_id + rows = self._simple_select_list_txn( + txn, table="e2e_device_keys_json", + keyvalues=keyvalues, + retcols=["device_id", "key_json"] + ) + for row in rows: + user_result[row["device_id"]] = row["key_json"] + return result + return self.runInteraction("get_e2e_device_keys", _get_e2e_device_keys) + + def add_e2e_one_time_keys(self, user_id, device_id, time_now, valid_until, + key_list): + def _add_e2e_one_time_keys(txn): + for (algorithm, key_id, json_bytes) in key_list: + self._simple_upsert_txn( + txn, table="e2e_one_time_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + "algorithm": algorithm, + "key_id": key_id, + }, + values={ + "ts_added_ms": time_now, + "valid_until_ms": valid_until, + "key_json": json_bytes, + } + ) + return self.runInteraction( + "add_e2e_one_time_keys", _add_e2e_one_time_keys + ) + + def count_e2e_one_time_keys(self, user_id, device_id, time_now): + """ Count the number of one time keys the server has for a device + Returns: + Dict mapping from algorithm to number of keys for that algorithm. + """ + def _count_e2e_one_time_keys(txn): + sql = ( + "DELETE FROM e2e_one_time_keys_json" + " WHERE user_id = ? AND device_id = ? AND valid_until_ms < ?" + ) + txn.execute(sql, (user_id, device_id, time_now)) + sql = ( + "SELECT algorithm, COUNT(key_id) FROM e2e_one_time_keys_json" + " WHERE user_id = ? AND device_id = ?" + " GROUP BY algorithm" + ) + txn.execute(sql, (user_id, device_id)) + result = {} + for algorithm, key_count in txn.fetchall(): + result[algorithm] = key_count + return result + return self.runInteraction( + "count_e2e_one_time_keys", _count_e2e_one_time_keys + ) + + def take_e2e_one_time_keys(self, query_list, time_now): + """Take a list of one time keys out of the database""" + def _take_e2e_one_time_keys(txn): + sql = ( + "SELECT key_id, key_json FROM e2e_one_time_keys_json" + " WHERE user_id = ? AND device_id = ? AND algorithm = ?" + " AND valid_until_ms > ?" + " LIMIT 1" + ) + result = {} + delete = [] + for user_id, device_id, algorithm in query_list: + user_result = result.setdefault(user_id, {}) + device_result = user_result.setdefault(device_id, {}) + txn.execute(sql, (user_id, device_id, algorithm, time_now)) + for key_id, key_json in txn.fetchall(): + device_result[algorithm + ":" + key_id] = key_json + delete.append((user_id, device_id, algorithm, key_id)) + sql = ( + "DELETE FROM e2e_one_time_keys_json" + " WHERE user_id = ? AND device_id = ? AND algorithm = ?" + " AND key_id = ?" + ) + for user_id, device_id, algorithm, key_id in delete: + txn.execute(sql, (user_id, device_id, algorithm, key_id)) + return result + return self.runInteraction( + "take_e2e_one_time_keys", _take_e2e_one_time_keys + ) diff --git a/synapse/storage/schema/delta/21/end_to_end_keys.sql b/synapse/storage/schema/delta/21/end_to_end_keys.sql new file mode 100644 index 000000000..107d2e67c --- /dev/null +++ b/synapse/storage/schema/delta/21/end_to_end_keys.sql @@ -0,0 +1,35 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS e2e_device_keys_json ( + user_id TEXT NOT NULL, -- The user these keys are for. + device_id TEXT NOT NULL, -- Which of the user's devices these keys are for. + ts_added_ms BIGINT NOT NULL, -- When the keys were uploaded. + key_json TEXT NOT NULL, -- The keys for the device as a JSON blob. + CONSTRAINT uniqueness UNIQUE (user_id, device_id) +); + + +CREATE TABLE IF NOT EXISTS e2e_one_time_keys_json ( + user_id TEXT NOT NULL, -- The user this one-time key is for. + device_id TEXT NOT NULL, -- The device this one-time key is for. + algorithm TEXT NOT NULL, -- Which algorithm this one-time key is for. + key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads. + ts_added_ms BIGINT NOT NULL, -- When this key was uploaded. + valid_until_ms BIGINT NOT NULL, -- When this key is valid until. + key_json TEXT NOT NULL, -- The key as a JSON blob. + CONSTRAINT uniqueness UNIQUE (user_id, device_id, algorithm, key_id) +); From 2ef182ee9358bce24cdef7c09ae7289925d076ef Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 6 Jul 2015 18:47:57 +0100 Subject: [PATCH 028/266] Add client API for uploading and querying keys for end to end encryption --- synapse/rest/client/v2_alpha/__init__.py | 4 +- synapse/rest/client/v2_alpha/keys.py | 287 +++++++++++++++++++++++ 2 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 synapse/rest/client/v2_alpha/keys.py diff --git a/synapse/rest/client/v2_alpha/__init__.py b/synapse/rest/client/v2_alpha/__init__.py index 7d1aff430..c3323d2a8 100644 --- a/synapse/rest/client/v2_alpha/__init__.py +++ b/synapse/rest/client/v2_alpha/__init__.py @@ -18,7 +18,8 @@ from . import ( filter, account, register, - auth + auth, + keys, ) from synapse.http.server import JsonResource @@ -38,3 +39,4 @@ class ClientV2AlphaRestResource(JsonResource): account.register_servlets(hs, client_resource) register.register_servlets(hs, client_resource) auth.register_servlets(hs, client_resource) + keys.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py new file mode 100644 index 000000000..3bb4ad64f --- /dev/null +++ b/synapse/rest/client/v2_alpha/keys.py @@ -0,0 +1,287 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.http.servlet import RestServlet +from syutil.jsonutil import encode_canonical_json + +from ._base import client_v2_pattern + +import simplejson as json +import logging + +logger = logging.getLogger(__name__) + + +class KeyUploadServlet(RestServlet): + """ + POST /keys/upload/ HTTP/1.1 + Content-Type: application/json + + { + "device_keys": { + "user_id": "", + "device_id": "", + "valid_until_ts": , + "algorithms": [ + "m.olm.curve25519-aes-sha256", + ] + "keys": { + ":": "", + }, + "signatures:" { + "/" { + ":": "" + } } }, + "one_time_keys": { + ":": "" + }, + "one_time_keys_valid_for": , + } + """ + PATTERN = client_v2_pattern("/keys/upload/(?P[^/]*)") + + def __init__(self, hs): + super(KeyUploadServlet, self).__init__() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request, device_id): + auth_user, client_info = yield self.auth.get_user_by_req(request) + user_id = auth_user.to_string() + # TODO: Check that the device_id matches that in the authentication + # or derive the device_id from the authentication instead. + try: + body = json.loads(request.content.read()) + except: + raise SynapseError(400, "Invalid key JSON") + time_now = self.clock.time_msec() + + # TODO: Validate the JSON to make sure it has the right keys. + device_keys = body.get("device_keys", None) + if device_keys: + logger.info( + "Updating device_keys for device %r for user %r at %d", + device_id, auth_user, time_now + ) + # TODO: Sign the JSON with the server key + yield self.store.set_e2e_device_keys( + user_id, device_id, time_now, + encode_canonical_json(device_keys) + ) + + one_time_keys = body.get("one_time_keys", None) + one_time_keys_valid_for = body.get("one_time_keys_valid_for", None) + if one_time_keys: + valid_until = int(one_time_keys_valid_for) + time_now + logger.info( + "Adding %d one_time_keys for device %r for user %r at %d" + " valid_until %d", + len(one_time_keys), device_id, user_id, time_now, valid_until + ) + key_list = [] + for key_id, key_json in one_time_keys.items(): + algorithm, key_id = key_id.split(":") + key_list.append(( + algorithm, key_id, encode_canonical_json(key_json) + )) + + yield self.store.add_e2e_one_time_keys( + user_id, device_id, time_now, valid_until, key_list + ) + + result = yield self.store.count_e2e_one_time_keys( + user_id, device_id, time_now + ) + defer.returnValue((200, {"one_time_key_counts": result})) + + @defer.inlineCallbacks + def on_GET(self, request, device_id): + auth_user, client_info = yield self.auth.get_user_by_req(request) + user_id = auth_user.to_string() + time_now = self.clock.time_msec() + + result = yield self.store.count_e2e_one_time_keys( + user_id, device_id, time_now + ) + defer.returnValue((200, {"one_time_key_counts": result})) + + +class KeyQueryServlet(RestServlet): + """ + GET /keys/query/ HTTP/1.1 + + GET /keys/query// HTTP/1.1 + + POST /keys/query HTTP/1.1 + Content-Type: application/json + { + "device_keys": { + "": [""] + } } + + HTTP/1.1 200 OK + { + "device_keys": { + "": { + "": { + "user_id": "", // Duplicated to be signed + "device_id": "", // Duplicated to be signed + "valid_until_ts": , + "algorithms": [ // List of supported algorithms + "m.olm.curve25519-aes-sha256", + ], + "keys": { // Must include a ed25519 signing key + ":": "", + }, + "signatures:" { + // Must be signed with device's ed25519 key + "/": { + ":": "" + } + // Must be signed by this server. + "": { + ":": "" + } } } } } } + """ + + PATTERN = client_v2_pattern( + "/keys/query(?:" + "/(?P[^/]*)(?:" + "/(?P[^/]*)" + ")?" + ")?" + ) + + def __init__(self, hs): + super(KeyQueryServlet, self).__init__() + self.store = hs.get_datastore() + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request, user_id, device_id): + logger.debug("onPOST") + yield self.auth.get_user_by_req(request) + try: + body = json.loads(request.content.read()) + except: + raise SynapseError(400, "Invalid key JSON") + query = [] + for user_id, device_ids in body.get("device_keys", {}).items(): + if not device_ids: + query.append((user_id, None)) + else: + for device_id in device_ids: + query.append((user_id, device_id)) + results = yield self.store.get_e2e_device_keys([(user_id, device_id)]) + defer.returnValue(self.json_result(request, results)) + + @defer.inlineCallbacks + def on_GET(self, request, user_id, device_id): + auth_user, client_info = yield self.auth.get_user_by_req(request) + auth_user_id = auth_user.to_string() + if not user_id: + user_id = auth_user_id + if not device_id: + device_id = None + # Returns a map of user_id->device_id->json_bytes. + results = yield self.store.get_e2e_device_keys([(user_id, device_id)]) + defer.returnValue(self.json_result(request, results)) + + def json_result(self, request, results): + json_result = {} + for user_id, device_keys in results.items(): + for device_id, json_bytes in device_keys.items(): + json_result.setdefault(user_id, {})[device_id] = json.loads( + json_bytes + ) + return (200, {"device_keys": json_result}) + + +class OneTimeKeyServlet(RestServlet): + """ + GET /keys/take/// HTTP/1.1 + + POST /keys/take HTTP/1.1 + { + "one_time_keys": { + "": { + "": "" + } } } + + HTTP/1.1 200 OK + { + "one_time_keys": { + "": { + "": { + ":": "" + } } } } + + """ + PATTERN = client_v2_pattern( + "/keys/take(?:/?|(?:/" + "(?P[^/]*)/(?P[^/]*)/(?P[^/]*)" + ")?)" + ) + + def __init__(self, hs): + super(OneTimeKeyServlet, self).__init__() + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_GET(self, request, user_id, device_id, algorithm): + yield self.auth.get_user_by_req(request) + time_now = self.clock.time_msec() + results = yield self.store.take_e2e_one_time_keys( + [(user_id, device_id, algorithm)], time_now + ) + defer.returnValue(self.json_result(request, results)) + + @defer.inlineCallbacks + def on_POST(self, request, user_id, device_id, algorithm): + yield self.auth.get_user_by_req(request) + try: + body = json.loads(request.content.read()) + except: + raise SynapseError(400, "Invalid key JSON") + query = [] + for user_id, device_keys in body.get("one_time_keys", {}).items(): + for device_id, algorithm in device_keys.items(): + query.append((user_id, device_id, algorithm)) + time_now = self.clock.time_msec() + results = yield self.store.take_e2e_one_time_keys(query, time_now) + defer.returnValue(self.json_result(request, results)) + + def json_result(self, request, results): + json_result = {} + for user_id, device_keys in results.items(): + for device_id, keys in device_keys.items(): + for key_id, json_bytes in keys.items(): + json_result.setdefault(user_id, {})[device_id] = { + key_id: json.loads(json_bytes) + } + return (200, {"one_time_keys": json_result}) + + +def register_servlets(hs, http_server): + KeyUploadServlet(hs).register(http_server) + KeyQueryServlet(hs).register(http_server) + OneTimeKeyServlet(hs).register(http_server) From e8b2f6f8a13c1b419911a54f349d05fe97b5ace0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Jul 2015 10:55:12 +0100 Subject: [PATCH 029/266] Add a ReceiptServlet --- synapse/rest/client/v2_alpha/__init__.py | 4 +- synapse/rest/client/v2_alpha/receipts.py | 56 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 synapse/rest/client/v2_alpha/receipts.py diff --git a/synapse/rest/client/v2_alpha/__init__.py b/synapse/rest/client/v2_alpha/__init__.py index 7d1aff430..231e1dd97 100644 --- a/synapse/rest/client/v2_alpha/__init__.py +++ b/synapse/rest/client/v2_alpha/__init__.py @@ -18,7 +18,8 @@ from . import ( filter, account, register, - auth + auth, + receipts, ) from synapse.http.server import JsonResource @@ -38,3 +39,4 @@ class ClientV2AlphaRestResource(JsonResource): account.register_servlets(hs, client_resource) register.register_servlets(hs, client_resource) auth.register_servlets(hs, client_resource) + receipts.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v2_alpha/receipts.py b/synapse/rest/client/v2_alpha/receipts.py new file mode 100644 index 000000000..829427b7b --- /dev/null +++ b/synapse/rest/client/v2_alpha/receipts.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.http.servlet import RestServlet +from ._base import client_v2_pattern + +import logging + + +logger = logging.getLogger(__name__) + + +class ReceiptRestServlet(RestServlet): + PATTERN = client_v2_pattern( + "/rooms/(?P[^/]*)" + "/receipt/(?P[^/]*)" + "/(?P[^/])*" + ) + + def __init__(self, hs): + super(ReceiptRestServlet, self).__init__() + self.hs = hs + self.auth = hs.get_auth() + self.receipts_handler = hs.get_handlers().receipts_handler + + @defer.inlineCallbacks + def on_POST(self, request, room_id, receipt_type, event_id): + user, client = yield self.auth.get_user_by_req(request) + + # TODO: STUFF + yield self.receipts_handler.received_client_receipt( + room_id, + receipt_type, + user_id=user.to_string(), + event_id=event_id + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReceiptRestServlet(hs).register(http_server) From 716e42693354553ee2878e3a8df6811226e91130 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Jul 2015 10:55:31 +0100 Subject: [PATCH 030/266] Fix various typos --- synapse/handlers/__init__.py | 2 ++ synapse/handlers/receipts.py | 6 ++++-- synapse/storage/__init__.py | 3 +++ synapse/storage/receipts.py | 13 +++++++------ 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 685792dbd..dc5b6ef79 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -32,6 +32,7 @@ from .appservice import ApplicationServicesHandler from .sync import SyncHandler from .auth import AuthHandler from .identity import IdentityHandler +from .receipts import ReceiptsHandler class Handlers(object): @@ -57,6 +58,7 @@ class Handlers(object): self.directory_handler = DirectoryHandler(hs) self.typing_notification_handler = TypingNotificationHandler(hs) self.admin_handler = AdminHandler(hs) + self.receipts_handler = ReceiptsHandler(hs) asapi = ApplicationServiceApi(hs) self.appservice_handler = ApplicationServicesHandler( hs, asapi, AppServiceScheduler( diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f0d12d35f..fc2f38c1c 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -31,6 +31,8 @@ class ReceiptsHandler(BaseHandler): def __init__(self, hs): super(ReceiptsHandler, self).__init__(hs) + self.hs = hs + self.federation = hs.get_replication_layer() self.federation.register_edu_handler( "m.receipt", self._received_remote_receipt ) @@ -89,13 +91,13 @@ class ReceiptsHandler(BaseHandler): with PreserveLoggingContext(): self.notifier.on_new_event( - "recei[t_key", self._latest_serial, rooms=[room_id] + "receipt_key", self._latest_serial, rooms=[room_id] ) localusers = set() remotedomains = set() - rm_handler = self.homeserver.get_handlers().room_member_handler + rm_handler = self.hs.get_handlers().room_member_handler yield rm_handler.fetch_room_distributions_into( room_id, localusers=localusers, remotedomains=remotedomains ) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 275598add..2bc88a795 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -38,6 +38,8 @@ from .state import StateStore from .signatures import SignatureStore from .filtering import FilteringStore +from .receipts import ReceiptsStore + import fnmatch import imp @@ -74,6 +76,7 @@ class DataStore(RoomMemberStore, RoomStore, PushRuleStore, ApplicationServiceTransactionStore, EventsStore, + ReceiptsStore, ): def __init__(self, hs): diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0168e74a0..15c11fd41 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -18,7 +18,7 @@ from ._base import SQLBaseStore, cached from twisted.internet import defer -class ReceiptStore(SQLBaseStore): +class ReceiptsStore(SQLBaseStore): @cached @defer.inlineCallbacks @@ -77,6 +77,7 @@ class ReceiptStore(SQLBaseStore): txn, table="receipts_linearized", values={ + "stream_id": stream_id, "room_id": room_id, "receipt_type": receipt_type, "user_id": user_id, @@ -109,16 +110,16 @@ class ReceiptStore(SQLBaseStore): return None linearized_event_id = yield self.runInteraction( - graph_to_linear, desc="insert_receipt_conv" + "insert_receipt_conv", graph_to_linear ) - stream_id_manager = yield self._stream_id_gen.get_next(self) - with stream_id_manager() as stream_id: + stream_id_manager = yield self._receipts_id_gen.get_next(self) + with stream_id_manager as stream_id: yield self.runInteraction( + "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, stream_id=stream_id, - desc="insert_linearized_receipt" ) yield self.insert_graph_receipt( @@ -131,9 +132,9 @@ class ReceiptStore(SQLBaseStore): def insert_graph_receipt(self, room_id, receipt_type, user_id, event_ids): return self.runInteraction( + "insert_graph_receipt", self.insert_graph_receipt_txn, room_id, receipt_type, user_id, event_ids, - desc="insert_graph_receipt" ) def insert_graph_receipt_txn(self, txn, room_id, receipt_type, From ca041d55267740214a2cfab95c44ee6f70cc6d0d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Jul 2015 15:25:30 +0100 Subject: [PATCH 031/266] Wire together receipts and the notifer/federation --- synapse/handlers/receipts.py | 81 ++++++++++++++++++------ synapse/rest/client/v2_alpha/receipts.py | 3 +- synapse/storage/receipts.py | 69 +++++++++++++++++--- synapse/streams/events.py | 6 +- 4 files changed, 126 insertions(+), 33 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index fc2f38c1c..94f081005 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -37,7 +37,8 @@ class ReceiptsHandler(BaseHandler): "m.receipt", self._received_remote_receipt ) - self._latest_serial = 0 + # self._earliest_cached_serial = 0 + # self._rooms_to_latest_serial = {} @defer.inlineCallbacks def received_client_receipt(self, room_id, receipt_type, user_id, @@ -53,8 +54,10 @@ class ReceiptsHandler(BaseHandler): "event_ids": [event_id], } - yield self._handle_new_receipts([receipt]) - self._push_remotes([receipt]) + is_new = yield self._handle_new_receipts([receipt]) + + if is_new: + self._push_remotes([receipt]) @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): @@ -81,33 +84,24 @@ class ReceiptsHandler(BaseHandler): user_id = receipt["user_id"] event_ids = receipt["event_ids"] - stream_id, max_persisted_id = yield self.store.insert_receipt( + res = yield self.store.insert_receipt( room_id, receipt_type, user_id, event_ids, ) - # TODO: Use max_persisted_id + if not res: + # res will be None if this read receipt is 'old' + defer.returnValue(False) - self._latest_serial = max(self._latest_serial, stream_id) + stream_id, max_persisted_id = res with PreserveLoggingContext(): self.notifier.on_new_event( - "receipt_key", self._latest_serial, rooms=[room_id] + "receipt_key", max_persisted_id, rooms=[room_id] ) - localusers = set() - remotedomains = set() - - rm_handler = self.hs.get_handlers().room_member_handler - yield rm_handler.fetch_room_distributions_into( - room_id, localusers=localusers, remotedomains=remotedomains - ) - - receipt["remotedomains"] = remotedomains - - self.notifier.on_new_event( - "receipt_key", self._latest_room_serial, rooms=[room_id] - ) + defer.returnValue(True) + @defer.inlineCallbacks def _push_remotes(self, receipts): # TODO: Some of this stuff should be coallesced. for receipt in receipts: @@ -115,7 +109,15 @@ class ReceiptsHandler(BaseHandler): receipt_type = receipt["receipt_type"] user_id = receipt["user_id"] event_ids = receipt["event_ids"] - remotedomains = receipt["remotedomains"] + + remotedomains = set() + + rm_handler = self.hs.get_handlers().room_member_handler + yield rm_handler.fetch_room_distributions_into( + room_id, localusers=None, remotedomains=remotedomains + ) + + logger.debug("Sending receipt to: %r", remotedomains) for domain in remotedomains: self.federation.send_edu( @@ -130,3 +132,40 @@ class ReceiptsHandler(BaseHandler): }, }, ) + + +class ReceiptEventSource(object): + def __init__(self, hs): + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def get_new_events_for_user(self, user, from_key, limit): + from_key = int(from_key) + to_key = yield self.get_current_key() + + rooms = yield self.store.get_rooms_for_user(user.to_string()) + rooms = [room.room_id for room in rooms] + content = {} + for room_id in rooms: + result = yield self.store.get_linearized_receipts_for_room( + room_id, from_key, to_key + ) + if result: + content[room_id] = result + + if not content: + defer.returnValue(([], to_key)) + + event = { + "type": "m.receipt", + "content": content, + } + + defer.returnValue(([event], to_key)) + + def get_current_key(self, direction='f'): + return self.store.get_max_receipt_stream_id() + + @defer.inlineCallbacks + def get_pagination_rows(self, user, config, key): + defer.returnValue(([{}], 0)) diff --git a/synapse/rest/client/v2_alpha/receipts.py b/synapse/rest/client/v2_alpha/receipts.py index 829427b7b..40406e2ed 100644 --- a/synapse/rest/client/v2_alpha/receipts.py +++ b/synapse/rest/client/v2_alpha/receipts.py @@ -28,7 +28,7 @@ class ReceiptRestServlet(RestServlet): PATTERN = client_v2_pattern( "/rooms/(?P[^/]*)" "/receipt/(?P[^/]*)" - "/(?P[^/])*" + "/(?P[^/]*)$" ) def __init__(self, hs): @@ -41,7 +41,6 @@ class ReceiptRestServlet(RestServlet): def on_POST(self, request, room_id, receipt_type, event_id): user, client = yield self.auth.get_user_by_req(request) - # TODO: STUFF yield self.receipts_handler.received_client_receipt( room_id, receipt_type, diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 15c11fd41..5a02c8025 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -17,17 +17,33 @@ from ._base import SQLBaseStore, cached from twisted.internet import defer +import logging + + +logger = logging.getLogger(__name__) + class ReceiptsStore(SQLBaseStore): - @cached @defer.inlineCallbacks - def get_linearized_receipts_for_room(self, room_id): - rows = yield self._simple_select_list( - table="receipts_linearized", - keyvalues={"room_id": room_id}, - retcols=["receipt_type", "user_id", "event_id"], - desc="get_linearized_receipts_for_room", + def get_linearized_receipts_for_room(self, room_id, from_key, to_key): + def f(txn): + sql = ( + "SELECT * FROM receipts_linearized WHERE" + " room_id = ? AND stream_id > ? AND stream_id <= ?" + ) + + txn.execute( + sql, + (room_id, from_key, to_key) + ) + + rows = self.cursor_to_dict(txn) + + return rows + + rows = yield self.runInteraction( + "get_linearized_receipts_for_room", f ) result = {} @@ -40,6 +56,9 @@ class ReceiptsStore(SQLBaseStore): defer.returnValue(result) + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_max_token(self) + @cached @defer.inlineCallbacks def get_graph_receipts_for_room(self, room_id): @@ -62,11 +81,38 @@ class ReceiptsStore(SQLBaseStore): def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, stream_id): + + # We don't want to clobber receipts for more recent events, so we + # have to compare orderings of existing receipts + sql = ( + "SELECT topological_ordering, stream_ordering, event_id FROM events" + " INNER JOIN receipts_linearized as r USING (event_id, room_id)" + " WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ?" + ) + + txn.execute(sql, (room_id, receipt_type, user_id)) + results = txn.fetchall() + + if results: + res = self._simple_select_one_txn( + txn, + table="events", + retcols=["topological_ordering", "stream_ordering"], + keyvalues={"event_id": event_id}, + ) + topological_ordering = int(res["topological_ordering"]) + stream_ordering = int(res["stream_ordering"]) + + for to, so, _ in results: + if int(to) > topological_ordering: + return False + elif int(to) == topological_ordering and int(so) >= stream_ordering: + return False + self._simple_delete_txn( txn, table="receipts_linearized", keyvalues={ - "stream_id": stream_id, "room_id": room_id, "receipt_type": receipt_type, "user_id": user_id, @@ -85,6 +131,8 @@ class ReceiptsStore(SQLBaseStore): } ) + return True + @defer.inlineCallbacks def insert_receipt(self, room_id, receipt_type, user_id, event_ids): if not event_ids: @@ -115,13 +163,16 @@ class ReceiptsStore(SQLBaseStore): stream_id_manager = yield self._receipts_id_gen.get_next(self) with stream_id_manager as stream_id: - yield self.runInteraction( + have_persisted = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, stream_id=stream_id, ) + if not have_persisted: + defer.returnValue(None) + yield self.insert_graph_receipt( room_id, receipt_type, user_id, event_ids ) diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 0a1a3a3d0..aaa3609aa 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -20,6 +20,7 @@ from synapse.types import StreamToken from synapse.handlers.presence import PresenceEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource +from synapse.handlers.receipts import ReceiptEventSource class NullSource(object): @@ -43,6 +44,7 @@ class EventSources(object): "room": RoomEventSource, "presence": PresenceEventSource, "typing": TypingNotificationEventSource, + "receipt": ReceiptEventSource, } def __init__(self, hs): @@ -63,7 +65,9 @@ class EventSources(object): typing_key=( yield self.sources["typing"].get_current_key() ), - receipt_key="0", + receipt_key=( + yield self.sources["receipt"].get_current_key() + ), ) defer.returnValue(token) From f0dd6d4cbddd71f19b4bb20754e302eb48dd21b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 7 Jul 2015 16:18:36 +0100 Subject: [PATCH 032/266] Fix test. --- tests/rest/client/v1/test_events.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 445272e32..ac3b0b58a 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -183,7 +183,17 @@ class EventStreamPermissionsTestCase(RestTestCase): ) self.assertEquals(200, code, msg=str(response)) - self.assertEquals(0, len(response["chunk"])) + # We may get a presence event for ourselves down + self.assertEquals( + 0, + len([ + c for c in response["chunk"] + if not ( + c.get("type") == "m.presence" + and c["content"].get("user_id") == self.user_id + ) + ]) + ) # joined room (expect all content for room) yield self.join(room=room_id, user=self.user_id, tok=self.token) From 87311d1b8cc648400dfce5db8a7fed46abbeb963 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Jul 2015 10:54:01 +0100 Subject: [PATCH 033/266] Hook up receipts to v1 initialSync --- synapse/handlers/message.py | 16 ++++++++++--- synapse/handlers/receipts.py | 45 +++++++++++++++++++++++++++++++++++- synapse/storage/receipts.py | 27 +++++++++++++++------- 3 files changed, 76 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e324662f1..7c1d6b548 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -278,6 +278,11 @@ class MessageHandler(BaseHandler): user, pagination_config.get_source_config("presence"), None ) + receipt_stream = self.hs.get_event_sources().sources["receipt"] + receipt, _ = yield receipt_stream.get_pagination_rows( + user, pagination_config.get_source_config("receipt"), None + ) + public_room_ids = yield self.store.get_public_room_ids() limit = pagin_config.limit @@ -344,7 +349,8 @@ class MessageHandler(BaseHandler): ret = { "rooms": rooms_ret, "presence": presence, - "end": now_token.to_string() + "receipts": receipt, + "end": now_token.to_string(), } defer.returnValue(ret) @@ -405,9 +411,12 @@ class MessageHandler(BaseHandler): defer.returnValue([p for success, p in presence_defs if success]) - presence, (messages, token) = yield defer.gatherResults( + receipts_handler = self.hs.get_handlers().receipts_handler + + presence, receipts, (messages, token) = yield defer.gatherResults( [ get_presence(), + receipts_handler.get_receipts_for_room(room_id, now_token.receipt_key), self.store.get_recent_events_for_room( room_id, limit=limit, @@ -431,5 +440,6 @@ class MessageHandler(BaseHandler): "end": end_token.to_string(), }, "state": state, - "presence": presence + "presence": presence, + "receipts": receipts, }) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 94f081005..f6cde30e6 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -133,6 +133,24 @@ class ReceiptsHandler(BaseHandler): }, ) + @defer.inlineCallbacks + def get_receipts_for_room(self, room_id, to_key): + result = yield self.store.get_linearized_receipts_for_room( + room_id, None, to_key + ) + + if not result: + defer.returnValue([]) + + event = { + "type": "m.receipt", + "content": { + room_id: result, + }, + } + + defer.returnValue([event]) + class ReceiptEventSource(object): def __init__(self, hs): @@ -168,4 +186,29 @@ class ReceiptEventSource(object): @defer.inlineCallbacks def get_pagination_rows(self, user, config, key): - defer.returnValue(([{}], 0)) + to_key = int(config.from_key) + + if config.to_key: + from_key = int(config.to_key) + else: + from_key = None + + rooms = yield self.store.get_rooms_for_user(user.to_string()) + rooms = [room.room_id for room in rooms] + content = {} + for room_id in rooms: + result = yield self.store.get_linearized_receipts_for_room( + room_id, from_key, to_key + ) + if result: + content[room_id] = result + + if not content: + defer.returnValue(([], to_key)) + + event = { + "type": "m.receipt", + "content": content, + } + + defer.returnValue(([event], to_key)) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 5a02c8025..07f8edaac 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -28,15 +28,26 @@ class ReceiptsStore(SQLBaseStore): @defer.inlineCallbacks def get_linearized_receipts_for_room(self, room_id, from_key, to_key): def f(txn): - sql = ( - "SELECT * FROM receipts_linearized WHERE" - " room_id = ? AND stream_id > ? AND stream_id <= ?" - ) + if from_key: + sql = ( + "SELECT * FROM receipts_linearized WHERE" + " room_id = ? AND stream_id > ? AND stream_id <= ?" + ) - txn.execute( - sql, - (room_id, from_key, to_key) - ) + txn.execute( + sql, + (room_id, from_key, to_key) + ) + else: + sql = ( + "SELECT * FROM receipts_linearized WHERE" + " room_id = ? AND stream_id <= ?" + ) + + txn.execute( + sql, + (room_id, to_key) + ) rows = self.cursor_to_dict(txn) From 81682d0f820a6209535267a45ee28b8f66ff7794 Mon Sep 17 00:00:00 2001 From: Muthu Subramanian Date: Tue, 7 Jul 2015 17:40:30 +0530 Subject: [PATCH 034/266] Integrate SAML2 basic authentication - uses pysaml2 --- synapse/config/homeserver.py | 6 ++-- synapse/config/saml2.py | 27 ++++++++++++++ synapse/handlers/register.py | 30 ++++++++++++++++ synapse/python_dependencies.py | 1 + synapse/rest/client/v1/login.py | 62 ++++++++++++++++++++++++++++++++- 5 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 synapse/config/saml2.py diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index fe0ccb6eb..5c655c537 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -25,12 +25,12 @@ from .registration import RegistrationConfig from .metrics import MetricsConfig from .appservice import AppServiceConfig from .key import KeyConfig - +from .saml2 import SAML2Config class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, RatelimitConfig, ContentRepositoryConfig, CaptchaConfig, - VoipConfig, RegistrationConfig, - MetricsConfig, AppServiceConfig, KeyConfig,): + VoipConfig, RegistrationConfig, MetricsConfig, + AppServiceConfig, KeyConfig, SAML2Config, ): pass diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py new file mode 100644 index 000000000..4f3a724e2 --- /dev/null +++ b/synapse/config/saml2.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Ericsson +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config + +class SAML2Config(Config): + def read_config(self, config): + self.saml2_config = config["saml2_config"] + + def default_config(self, config_dir_path, server_name): + return """ + saml2_config: + config_path: "%s/sp_conf.py" + idp_redirect_url: "http://%s/idp" + """%(config_dir_path, server_name) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 7b68585a1..4c6c5e297 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -192,6 +192,36 @@ class RegistrationHandler(BaseHandler): else: logger.info("Valid captcha entered from %s", ip) + @defer.inlineCallbacks + def register_saml2(self, localpart): + """ + Registers email_id as SAML2 Based Auth. + """ + if urllib.quote(localpart) != localpart: + raise SynapseError( + 400, + "User ID must only contain characters which do not" + " require URL encoding." + ) + user = UserID(localpart, self.hs.hostname) + user_id = user.to_string() + + yield self.check_user_id_is_valid(user_id) + token = self._generate_token(user_id) + try: + yield self.store.register( + user_id=user_id, + token=token, + password_hash=None + ) + yield self.distributor.fire("registered_user", user) + except Exception, e: + yield self.store.add_access_token_to_user(user_id, token) + # Ignore Registration errors + logger.exception(e) + defer.returnValue((user_id, token)) + + @defer.inlineCallbacks def register_email(self, threepidCreds): """ diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index f9e59dd91..17587170c 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -31,6 +31,7 @@ REQUIREMENTS = { "pillow": ["PIL"], "pydenticon": ["pydenticon"], "ujson": ["ujson"], + "pysaml2": ["saml2"], } CONDITIONAL_REQUIREMENTS = { "web_client": { diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index b2257b749..dc7615c6f 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -20,14 +20,32 @@ from synapse.types import UserID from base import ClientV1RestServlet, client_path_pattern import simplejson as json +import cgi +import urllib + +import logging +from saml2 import BINDING_HTTP_REDIRECT +from saml2 import BINDING_HTTP_POST +from saml2.metadata import create_metadata_string +from saml2 import config +from saml2.client import Saml2Client +from saml2.httputil import ServiceError +from saml2.samlp import Extensions +from saml2.extension.pefim import SPCertEnc +from saml2.s_utils import rndstr class LoginRestServlet(ClientV1RestServlet): PATTERN = client_path_pattern("/login$") PASS_TYPE = "m.login.password" + SAML2_TYPE = "m.login.saml2" + + def __init__(self, hs): + super(LoginRestServlet, self).__init__(hs) + self.idp_redirect_url = hs.config.saml2_config['idp_redirect_url'] def on_GET(self, request): - return (200, {"flows": [{"type": LoginRestServlet.PASS_TYPE}]}) + return (200, {"flows": [{"type": LoginRestServlet.PASS_TYPE}, {"type": LoginRestServlet.SAML2_TYPE}]}) def on_OPTIONS(self, request): return (200, {}) @@ -39,6 +57,14 @@ class LoginRestServlet(ClientV1RestServlet): if login_submission["type"] == LoginRestServlet.PASS_TYPE: result = yield self.do_password_login(login_submission) defer.returnValue(result) + elif login_submission["type"] == LoginRestServlet.SAML2_TYPE: + relay_state = "" + if "relay_state" in login_submission: + relay_state = "&RelayState="+urllib.quote(login_submission["relay_state"]) + result = { + "uri": "%s%s"%(self.idp_redirect_url, relay_state) + } + defer.returnValue((200, result)) else: raise SynapseError(400, "Bad login type.") except KeyError: @@ -93,6 +119,39 @@ class PasswordResetRestServlet(ClientV1RestServlet): "Missing keys. Requires 'email' and 'user_id'." ) +class SAML2RestServlet(ClientV1RestServlet): + PATTERN = client_path_pattern("/login/saml2") + + def __init__(self, hs): + super(SAML2RestServlet, self).__init__(hs) + self.sp_config = hs.config.saml2_config['config_path'] + + @defer.inlineCallbacks + def on_POST(self, request): + saml2_auth = None + try: + conf = config.SPConfig() + conf.load_file(self.sp_config) + SP = Saml2Client(conf) + saml2_auth = SP.parse_authn_request_response(request.args['SAMLResponse'][0], BINDING_HTTP_POST) + except Exception, e: # Not authenticated + logger = logging.getLogger(__name__) + logger.exception(e) + if saml2_auth and saml2_auth.status_ok() and not saml2_auth.not_signed: + username = saml2_auth.name_id.text + handler = self.handlers.registration_handler + (user_id, token) = yield handler.register_saml2(username) + # Forward to the RelayState callback along with ava + if 'RelayState' in request.args: + request.redirect(urllib.unquote(request.args['RelayState'][0])+'?status=authenticated&access_token='+token+'&user_id='+user_id+'&ava='+urllib.quote(json.dumps(saml2_auth.ava))) + request.finish() + defer.returnValue(None) + defer.returnValue((200, {"status":"authenticated", "user_id": user_id, "token": token, "ava":saml2_auth.ava})) + elif 'RelayState' in request.args: + request.redirect(urllib.unquote(request.args['RelayState'][0])+'?status=not_authenticated') + request.finish() + defer.returnValue(None) + defer.returnValue((200, {"status":"not_authenticated"})) def _parse_json(request): try: @@ -106,4 +165,5 @@ def _parse_json(request): def register_servlets(hs, http_server): LoginRestServlet(hs).register(http_server) + SAML2RestServlet(hs).register(http_server) # TODO PasswordResetRestServlet(hs).register(http_server) From 77c5db5977c3fb61d9d2906c6692ee502d477e18 Mon Sep 17 00:00:00 2001 From: Muthu Subramanian Date: Wed, 8 Jul 2015 16:05:20 +0530 Subject: [PATCH 035/266] code beautify --- synapse/rest/client/v1/login.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index dc7615c6f..b4c74c4c2 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -45,7 +45,8 @@ class LoginRestServlet(ClientV1RestServlet): self.idp_redirect_url = hs.config.saml2_config['idp_redirect_url'] def on_GET(self, request): - return (200, {"flows": [{"type": LoginRestServlet.PASS_TYPE}, {"type": LoginRestServlet.SAML2_TYPE}]}) + return (200, {"flows": [{"type": LoginRestServlet.PASS_TYPE}, + {"type": LoginRestServlet.SAML2_TYPE}]}) def on_OPTIONS(self, request): return (200, {}) @@ -60,9 +61,10 @@ class LoginRestServlet(ClientV1RestServlet): elif login_submission["type"] == LoginRestServlet.SAML2_TYPE: relay_state = "" if "relay_state" in login_submission: - relay_state = "&RelayState="+urllib.quote(login_submission["relay_state"]) + relay_state = "&RelayState="+urllib.quote( + login_submission["relay_state"]) result = { - "uri": "%s%s"%(self.idp_redirect_url, relay_state) + "uri": "%s%s" % (self.idp_redirect_url, relay_state) } defer.returnValue((200, result)) else: @@ -119,6 +121,7 @@ class PasswordResetRestServlet(ClientV1RestServlet): "Missing keys. Requires 'email' and 'user_id'." ) + class SAML2RestServlet(ClientV1RestServlet): PATTERN = client_path_pattern("/login/saml2") @@ -133,25 +136,35 @@ class SAML2RestServlet(ClientV1RestServlet): conf = config.SPConfig() conf.load_file(self.sp_config) SP = Saml2Client(conf) - saml2_auth = SP.parse_authn_request_response(request.args['SAMLResponse'][0], BINDING_HTTP_POST) - except Exception, e: # Not authenticated + saml2_auth = SP.parse_authn_request_response( + request.args['SAMLResponse'][0], BINDING_HTTP_POST) + except Exception, e: # Not authenticated logger = logging.getLogger(__name__) logger.exception(e) - if saml2_auth and saml2_auth.status_ok() and not saml2_auth.not_signed: + if saml2_auth and saml2_auth.status_ok() and not saml2_auth.not_signed: username = saml2_auth.name_id.text handler = self.handlers.registration_handler (user_id, token) = yield handler.register_saml2(username) # Forward to the RelayState callback along with ava if 'RelayState' in request.args: - request.redirect(urllib.unquote(request.args['RelayState'][0])+'?status=authenticated&access_token='+token+'&user_id='+user_id+'&ava='+urllib.quote(json.dumps(saml2_auth.ava))) + request.redirect(urllib.unquote( + request.args['RelayState'][0]) + + '?status=authenticated&access_token=' + + token + '&user_id=' + user_id + '&ava=' + + urllib.quote(json.dumps(saml2_auth.ava))) request.finish() defer.returnValue(None) - defer.returnValue((200, {"status":"authenticated", "user_id": user_id, "token": token, "ava":saml2_auth.ava})) + defer.returnValue((200, {"status": "authenticated", + "user_id": user_id, "token": token, + "ava": saml2_auth.ava})) elif 'RelayState' in request.args: - request.redirect(urllib.unquote(request.args['RelayState'][0])+'?status=not_authenticated') + request.redirect(urllib.unquote( + request.args['RelayState'][0]) + + '?status=not_authenticated') request.finish() defer.returnValue(None) - defer.returnValue((200, {"status":"not_authenticated"})) + defer.returnValue((200, {"status": "not_authenticated"})) + def _parse_json(request): try: From f53bae0c1948a8c0a229e0b20f237f7ff4b1d84c Mon Sep 17 00:00:00 2001 From: Muthu Subramanian Date: Wed, 8 Jul 2015 16:05:46 +0530 Subject: [PATCH 036/266] code beautify --- synapse/config/homeserver.py | 1 + synapse/config/saml2.py | 3 ++- synapse/handlers/register.py | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 5c655c537..d77f04540 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -27,6 +27,7 @@ from .appservice import AppServiceConfig from .key import KeyConfig from .saml2 import SAML2Config + class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, RatelimitConfig, ContentRepositoryConfig, CaptchaConfig, VoipConfig, RegistrationConfig, MetricsConfig, diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index 4f3a724e2..d18d076a8 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -15,6 +15,7 @@ from ._base import Config + class SAML2Config(Config): def read_config(self, config): self.saml2_config = config["saml2_config"] @@ -24,4 +25,4 @@ class SAML2Config(Config): saml2_config: config_path: "%s/sp_conf.py" idp_redirect_url: "http://%s/idp" - """%(config_dir_path, server_name) + """ % (config_dir_path, server_name) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 4c6c5e297..a1288b425 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -220,7 +220,6 @@ class RegistrationHandler(BaseHandler): # Ignore Registration errors logger.exception(e) defer.returnValue((user_id, token)) - @defer.inlineCallbacks def register_email(self, threepidCreds): From d85ce8d89bc1fb6ff6f277fb424dddca7ab2f47e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Jul 2015 11:36:05 +0100 Subject: [PATCH 037/266] Split receipt events up into one per room --- synapse/handlers/receipts.py | 49 +++++++++++++++--------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f6cde30e6..b7567b9ea 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -144,9 +144,8 @@ class ReceiptsHandler(BaseHandler): event = { "type": "m.receipt", - "content": { - room_id: result, - }, + "room_id": room_id, + "content": result, } defer.returnValue([event]) @@ -163,23 +162,19 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] - content = {} + events = [] for room_id in rooms: - result = yield self.store.get_linearized_receipts_for_room( + content = yield self.store.get_linearized_receipts_for_room( room_id, from_key, to_key ) - if result: - content[room_id] = result + if content: + events.append({ + "type": "m.receipt", + "room_id": room_id, + "content": content, + }) - if not content: - defer.returnValue(([], to_key)) - - event = { - "type": "m.receipt", - "content": content, - } - - defer.returnValue(([event], to_key)) + defer.returnValue((events, to_key)) def get_current_key(self, direction='f'): return self.store.get_max_receipt_stream_id() @@ -195,20 +190,16 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] - content = {} + events = [] for room_id in rooms: - result = yield self.store.get_linearized_receipts_for_room( + content = yield self.store.get_linearized_receipts_for_room( room_id, from_key, to_key ) - if result: - content[room_id] = result + if content: + events.append({ + "type": "m.receipt", + "room_id": room_id, + "content": content, + }) - if not content: - defer.returnValue(([], to_key)) - - event = { - "type": "m.receipt", - "content": content, - } - - defer.returnValue(([event], to_key)) + defer.returnValue((events, to_key)) From af812b68ddbd1a69a8c98c463248d000633b075f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Jul 2015 15:35:00 +0100 Subject: [PATCH 038/266] Add a cache to fetching of receipt streams --- synapse/handlers/receipts.py | 31 +++--------- synapse/storage/receipts.py | 92 ++++++++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index b7567b9ea..053ed8480 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -37,8 +37,7 @@ class ReceiptsHandler(BaseHandler): "m.receipt", self._received_remote_receipt ) - # self._earliest_cached_serial = 0 - # self._rooms_to_latest_serial = {} + self._receipt_cache = None @defer.inlineCallbacks def received_client_receipt(self, room_id, receipt_type, user_id, @@ -162,17 +161,9 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] - events = [] - for room_id in rooms: - content = yield self.store.get_linearized_receipts_for_room( - room_id, from_key, to_key - ) - if content: - events.append({ - "type": "m.receipt", - "room_id": room_id, - "content": content, - }) + events = yield self.store.get_linearized_receipts_for_rooms( + rooms, from_key, to_key + ) defer.returnValue((events, to_key)) @@ -190,16 +181,8 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] - events = [] - for room_id in rooms: - content = yield self.store.get_linearized_receipts_for_room( - room_id, from_key, to_key - ) - if content: - events.append({ - "type": "m.receipt", - "room_id": room_id, - "content": content, - }) + events = yield self.store.get_linearized_receipts_for_rooms( + rooms, from_key, to_key + ) defer.returnValue((events, to_key)) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 07f8edaac..503f68f85 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -17,6 +17,9 @@ from ._base import SQLBaseStore, cached from twisted.internet import defer +from synapse.util import unwrapFirstError + +from blist import sorteddict import logging @@ -24,6 +27,29 @@ logger = logging.getLogger(__name__) class ReceiptsStore(SQLBaseStore): + def __init__(self, hs): + super(ReceiptsStore, self).__init__(hs) + + self._receipts_stream_cache = _RoomStreamChangeCache() + + @defer.inlineCallbacks + def get_linearized_receipts_for_rooms(self, room_ids, from_key, to_key): + room_ids = set(room_ids) + + if from_key: + room_ids = yield self._receipts_stream_cache.get_rooms_changed( + self, room_ids, from_key + ) + + results = yield defer.gatherResults( + [ + self.get_linearized_receipts_for_room(room_id, from_key, to_key) + for room_id in room_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + defer.returnValue([ev for res in results for ev in res]) @defer.inlineCallbacks def get_linearized_receipts_for_room(self, room_id, from_key, to_key): @@ -57,15 +83,22 @@ class ReceiptsStore(SQLBaseStore): "get_linearized_receipts_for_room", f ) - result = {} + if not rows: + defer.returnValue([]) + + content = {} for row in rows: - result.setdefault( + content.setdefault( row["event_id"], {} ).setdefault( row["receipt_type"], [] ).append(row["user_id"]) - defer.returnValue(result) + defer.returnValue([{ + "type": "m.receipt", + "room_id": room_id, + "content": content, + }]) def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_max_token(self) @@ -174,6 +207,9 @@ class ReceiptsStore(SQLBaseStore): stream_id_manager = yield self._receipts_id_gen.get_next(self) with stream_id_manager as stream_id: + yield self._receipts_stream_cache.room_has_changed( + self, room_id, stream_id + ) have_persisted = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, @@ -223,3 +259,53 @@ class ReceiptsStore(SQLBaseStore): for event_id in event_ids ], ) + + +class _RoomStreamChangeCache(object): + """Keeps track of the stream_id of the latest change in rooms. + + Given a list of rooms and stream key, it will give a subset of rooms that + may have changed since that key. If the key is too old then the cache + will simply return all rooms. + """ + def __init__(self, size_of_cache=1000): + self._size_of_cache = size_of_cache + self._room_to_key = {} + self._cache = sorteddict() + self._earliest_key = None + + @defer.inlineCallbacks + def get_rooms_changed(self, store, room_ids, key): + if key > (yield self._get_earliest_key(store)): + keys = self._cache.keys() + i = keys.bisect_right(key) + + result = set( + self._cache[k] for k in keys[i:] + ).intersection(room_ids) + else: + result = room_ids + + defer.returnValue(result) + + @defer.inlineCallbacks + def room_has_changed(self, store, room_id, key): + if key > (yield self._get_earliest_key(store)): + old_key = self._room_to_key.get(room_id, None) + if old_key: + key = max(key, old_key) + self._cache.pop(old_key, None) + self._cache[key] = room_id + + while len(self._cache) > self._size_of_cache: + k, r = self._cache.popitem() + self._earliest_key = max(k, self._earliest_key) + self._room_to_key.pop(r, None) + + @defer.inlineCallbacks + def _get_earliest_key(self, store): + if self._earliest_key is None: + self._earliest_key = yield store.get_max_receipt_stream_id() + self._earliest_key = int(self._earliest_key) + + defer.returnValue(self._earliest_key) From ce9e2f84ad6c62e64884fef1965b786eea3c365c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Jul 2015 15:41:59 +0100 Subject: [PATCH 039/266] Add blist to dependencies --- synapse/python_dependencies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index f9e59dd91..daaa61b5f 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -31,6 +31,7 @@ REQUIREMENTS = { "pillow": ["PIL"], "pydenticon": ["pydenticon"], "ujson": ["ujson"], + "blist": ["blist"], } CONDITIONAL_REQUIREMENTS = { "web_client": { From 8fb79eeea4b1c388771785024b79e84b4206fc24 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 8 Jul 2015 17:04:29 +0100 Subject: [PATCH 040/266] Only remove one time keys when new one time keys are added --- synapse/storage/end_to_end_keys.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 936a64669..b3cede37e 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -58,6 +58,11 @@ class EndToEndKeyStore(SQLBaseStore): def add_e2e_one_time_keys(self, user_id, device_id, time_now, valid_until, key_list): def _add_e2e_one_time_keys(txn): + sql = ( + "DELETE FROM e2e_one_time_keys_json" + " WHERE user_id = ? AND device_id = ? AND valid_until_ms < ?" + ) + txn.execute(sql, (user_id, device_id, time_now)) for (algorithm, key_id, json_bytes) in key_list: self._simple_upsert_txn( txn, table="e2e_one_time_keys_json", @@ -83,17 +88,12 @@ class EndToEndKeyStore(SQLBaseStore): Dict mapping from algorithm to number of keys for that algorithm. """ def _count_e2e_one_time_keys(txn): - sql = ( - "DELETE FROM e2e_one_time_keys_json" - " WHERE user_id = ? AND device_id = ? AND valid_until_ms < ?" - ) - txn.execute(sql, (user_id, device_id, time_now)) sql = ( "SELECT algorithm, COUNT(key_id) FROM e2e_one_time_keys_json" - " WHERE user_id = ? AND device_id = ?" + " WHERE user_id = ? AND device_id = ? AND valid_until_ms >= ?" " GROUP BY algorithm" ) - txn.execute(sql, (user_id, device_id)) + txn.execute(sql, (user_id, device_id, time_now)) result = {} for algorithm, key_count in txn.fetchall(): result[algorithm] = key_count From 64afbe6ccd19bb2ec94f3fbb3d91586202c924fd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 8 Jul 2015 18:20:02 +0100 Subject: [PATCH 041/266] add new optional config for tls_certificate_chain_path for folks with intermediary SSL certs --- synapse/config/tls.py | 20 +++++++++++++++++--- synapse/crypto/context_factory.py | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index ecb2d42c1..e04fe0d96 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -25,9 +25,19 @@ GENERATE_DH_PARAMS = False class TlsConfig(Config): def read_config(self, config): self.tls_certificate = self.read_tls_certificate( - config.get("tls_certificate_path") + config.get("tls_certificate_path"), + "tls_certificate" ) + tls_certificate_chain_path = + config.get("tls_certificate_chain_path") + + if tls_certificate_chain_path and os.path.exists(tls_certificate_chain_path): + self.tls_certificate_chain = self.read_tls_certificate( + config.get("tls_certificate_chain_path"), + "tls_certificate_chain" + ) + self.no_tls = config.get("no_tls", False) if self.no_tls: @@ -45,6 +55,7 @@ class TlsConfig(Config): base_key_name = os.path.join(config_dir_path, server_name) tls_certificate_path = base_key_name + ".tls.crt" + tls_certificate_chain_path = base_key_name + ".tls.chain.crt" tls_private_key_path = base_key_name + ".tls.key" tls_dh_params_path = base_key_name + ".tls.dh" @@ -52,6 +63,9 @@ class TlsConfig(Config): # PEM encoded X509 certificate for TLS tls_certificate_path: "%(tls_certificate_path)s" + # PEM encoded X509 intermediary certificate file for TLS (optional) + # tls_certificate_chain_path: "%(tls_certificate_chain_path)s" + # PEM encoded private key for TLS tls_private_key_path: "%(tls_private_key_path)s" @@ -62,8 +76,8 @@ class TlsConfig(Config): no_tls: False """ % locals() - def read_tls_certificate(self, cert_path): - cert_pem = self.read_file(cert_path, "tls_certificate") + def read_tls_certificate(self, cert_path, config_name): + cert_pem = self.read_file(cert_path, config_name) return crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) def read_tls_private_key(self, private_key_path): diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 2f8618a0d..ea5dd1e7d 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -38,6 +38,8 @@ class ServerContextFactory(ssl.ContextFactory): logger.exception("Failed to enable eliptic curve for TLS") context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) context.use_certificate(config.tls_certificate) + if config.tls_certificate_chain: + context.use_certificate_chain_file(config.tls_certificate_chain) if not config.no_tls: context.use_privatekey(config.tls_private_key) From 465acb0c6ac575c120c756486171aa701abaa4eb Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 8 Jul 2015 18:30:59 +0100 Subject: [PATCH 042/266] *cough* --- synapse/config/tls.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index e04fe0d96..945af3805 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -29,14 +29,15 @@ class TlsConfig(Config): "tls_certificate" ) - tls_certificate_chain_path = - config.get("tls_certificate_chain_path") + tls_certificate_chain_path = config.get("tls_certificate_chain_path") if tls_certificate_chain_path and os.path.exists(tls_certificate_chain_path): self.tls_certificate_chain = self.read_tls_certificate( config.get("tls_certificate_chain_path"), "tls_certificate_chain" ) + else: + self.tls_certificate_chain = None self.no_tls = config.get("no_tls", False) From 19fa3731aee498159cbc475f8b29f66dacb6aba6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 8 Jul 2015 18:53:41 +0100 Subject: [PATCH 043/266] typo --- synapse/crypto/context_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index ea5dd1e7d..324dc31fe 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -35,7 +35,7 @@ class ServerContextFactory(ssl.ContextFactory): _ecCurve = _OpenSSLECCurve(_defaultCurveName) _ecCurve.addECKeyToContext(context) except: - logger.exception("Failed to enable eliptic curve for TLS") + logger.exception("Failed to enable elliptic curve for TLS") context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) context.use_certificate(config.tls_certificate) if config.tls_certificate_chain: From f26a3df1bf6cabee28f5f91778082c0f26b2378c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 8 Jul 2015 21:33:02 +0100 Subject: [PATCH 044/266] oops, context.tls_certificate_chain_file() expects a file, not a certificate. --- synapse/config/tls.py | 5 +---- synapse/crypto/context_factory.py | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 945af3805..5fef63846 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -32,10 +32,7 @@ class TlsConfig(Config): tls_certificate_chain_path = config.get("tls_certificate_chain_path") if tls_certificate_chain_path and os.path.exists(tls_certificate_chain_path): - self.tls_certificate_chain = self.read_tls_certificate( - config.get("tls_certificate_chain_path"), - "tls_certificate_chain" - ) + self.tls_certificate_chain_file = tls_certificate_chain_path else: self.tls_certificate_chain = None diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 324dc31fe..d515007ca 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -38,8 +38,8 @@ class ServerContextFactory(ssl.ContextFactory): logger.exception("Failed to enable elliptic curve for TLS") context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) context.use_certificate(config.tls_certificate) - if config.tls_certificate_chain: - context.use_certificate_chain_file(config.tls_certificate_chain) + if config.tls_certificate_chain_file: + context.use_certificate_chain_file(config.tls_certificate_chain_file) if not config.no_tls: context.use_privatekey(config.tls_private_key) From 8ad2d2d1cb679484dcffc7bacfd9c1de3f6dad38 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 9 Jul 2015 00:06:01 +0100 Subject: [PATCH 045/266] document tls_certificate_chain_path more clearly --- synapse/config/tls.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 5fef63846..de57d0d0e 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -62,6 +62,11 @@ class TlsConfig(Config): tls_certificate_path: "%(tls_certificate_path)s" # PEM encoded X509 intermediary certificate file for TLS (optional) + # This *must* be a concatenation of the tls_certificate pointed to + # by tls_certificate_path followed by the intermediary certificates + # in hierarchical order. If specified this option overrides the + # tls_certificate_path parameter. + # # tls_certificate_chain_path: "%(tls_certificate_chain_path)s" # PEM encoded private key for TLS From fb8d2862c1d7582096b5f8bd6194dcbe8e1afc01 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 9 Jul 2015 00:45:41 +0100 Subject: [PATCH 046/266] remove the tls_certificate_chain_path param and simply support tls_certificate_path pointing to a file containing a chain of certificates --- synapse/config/tls.py | 30 +++++++++--------------------- synapse/crypto/context_factory.py | 4 +--- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index de57d0d0e..e136d1371 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -25,16 +25,9 @@ GENERATE_DH_PARAMS = False class TlsConfig(Config): def read_config(self, config): self.tls_certificate = self.read_tls_certificate( - config.get("tls_certificate_path"), - "tls_certificate" + config.get("tls_certificate_path") ) - - tls_certificate_chain_path = config.get("tls_certificate_chain_path") - - if tls_certificate_chain_path and os.path.exists(tls_certificate_chain_path): - self.tls_certificate_chain_file = tls_certificate_chain_path - else: - self.tls_certificate_chain = None + self.tls_certificate_file = config.get("tls_certificate_path"); self.no_tls = config.get("no_tls", False) @@ -53,22 +46,17 @@ class TlsConfig(Config): base_key_name = os.path.join(config_dir_path, server_name) tls_certificate_path = base_key_name + ".tls.crt" - tls_certificate_chain_path = base_key_name + ".tls.chain.crt" tls_private_key_path = base_key_name + ".tls.key" tls_dh_params_path = base_key_name + ".tls.dh" return """\ - # PEM encoded X509 certificate for TLS + # PEM encoded X509 certificate for TLS. + # You can replace the self-signed certificate that synapse + # autogenerates on launch with your own SSL certificate + key pair + # if you like. Any required intermediary certificates can be + # appended after the primary certificate in hierarchical order. tls_certificate_path: "%(tls_certificate_path)s" - # PEM encoded X509 intermediary certificate file for TLS (optional) - # This *must* be a concatenation of the tls_certificate pointed to - # by tls_certificate_path followed by the intermediary certificates - # in hierarchical order. If specified this option overrides the - # tls_certificate_path parameter. - # - # tls_certificate_chain_path: "%(tls_certificate_chain_path)s" - # PEM encoded private key for TLS tls_private_key_path: "%(tls_private_key_path)s" @@ -79,8 +67,8 @@ class TlsConfig(Config): no_tls: False """ % locals() - def read_tls_certificate(self, cert_path, config_name): - cert_pem = self.read_file(cert_path, config_name) + def read_tls_certificate(self, cert_path): + cert_pem = self.read_file(cert_path, "tls_certificate") return crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) def read_tls_private_key(self, private_key_path): diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index d515007ca..c4390f3b2 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -37,9 +37,7 @@ class ServerContextFactory(ssl.ContextFactory): except: logger.exception("Failed to enable elliptic curve for TLS") context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) - context.use_certificate(config.tls_certificate) - if config.tls_certificate_chain_file: - context.use_certificate_chain_file(config.tls_certificate_chain_file) + context.use_certificate_chain_file(config.tls_certificate_file) if not config.no_tls: context.use_privatekey(config.tls_private_key) From d2caa5351aece72b274f78fe81348f715389d421 Mon Sep 17 00:00:00 2001 From: Muthu Subramanian Date: Thu, 9 Jul 2015 12:58:15 +0530 Subject: [PATCH 047/266] code beautify --- synapse/rest/client/v1/login.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index b4c74c4c2..b4894497b 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -20,19 +20,15 @@ from synapse.types import UserID from base import ClientV1RestServlet, client_path_pattern import simplejson as json -import cgi import urllib import logging -from saml2 import BINDING_HTTP_REDIRECT from saml2 import BINDING_HTTP_POST -from saml2.metadata import create_metadata_string from saml2 import config from saml2.client import Saml2Client -from saml2.httputil import ServiceError -from saml2.samlp import Extensions -from saml2.extension.pefim import SPCertEnc -from saml2.s_utils import rndstr + + +logger = logging.getLogger(__name__) class LoginRestServlet(ClientV1RestServlet): @@ -137,9 +133,8 @@ class SAML2RestServlet(ClientV1RestServlet): conf.load_file(self.sp_config) SP = Saml2Client(conf) saml2_auth = SP.parse_authn_request_response( - request.args['SAMLResponse'][0], BINDING_HTTP_POST) + request.args['SAMLResponse'][0], BINDING_HTTP_POST) except Exception, e: # Not authenticated - logger = logging.getLogger(__name__) logger.exception(e) if saml2_auth and saml2_auth.status_ok() and not saml2_auth.not_signed: username = saml2_auth.name_id.text From 8cd34dfe955841d7ff3306b84a686e7138aec526 Mon Sep 17 00:00:00 2001 From: Muthu Subramanian Date: Thu, 9 Jul 2015 13:34:47 +0530 Subject: [PATCH 048/266] Make SAML2 optional and add some references/comments --- synapse/config/saml2.py | 14 ++++++++++++++ synapse/rest/client/v1/login.py | 13 +++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index d18d076a8..be5176db5 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -16,6 +16,19 @@ from ._base import Config +# +# SAML2 Configuration +# Synapse uses pysaml2 libraries for providing SAML2 support +# +# config_path: Path to the sp_conf.py configuration file +# idp_redirect_url: Identity provider URL which will redirect +# the user back to /login/saml2 with proper info. +# +# sp_conf.py file is something like: +# https://github.com/rohe/pysaml2/blob/master/example/sp-repoze/sp_conf.py.example +# +# More information: https://pythonhosted.org/pysaml2/howto/config.html +# class SAML2Config(Config): def read_config(self, config): self.saml2_config = config["saml2_config"] @@ -23,6 +36,7 @@ class SAML2Config(Config): def default_config(self, config_dir_path, server_name): return """ saml2_config: + enabled: false config_path: "%s/sp_conf.py" idp_redirect_url: "http://%s/idp" """ % (config_dir_path, server_name) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index b4894497b..f64f5e990 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -39,10 +39,13 @@ class LoginRestServlet(ClientV1RestServlet): def __init__(self, hs): super(LoginRestServlet, self).__init__(hs) self.idp_redirect_url = hs.config.saml2_config['idp_redirect_url'] + self.saml2_enabled = hs.config.saml2_config['enabled'] def on_GET(self, request): - return (200, {"flows": [{"type": LoginRestServlet.PASS_TYPE}, - {"type": LoginRestServlet.SAML2_TYPE}]}) + flows = [{"type": LoginRestServlet.PASS_TYPE}] + if self.saml2_enabled: + flows.append({"type": LoginRestServlet.SAML2_TYPE}) + return (200, {"flows": flows}) def on_OPTIONS(self, request): return (200, {}) @@ -54,7 +57,8 @@ class LoginRestServlet(ClientV1RestServlet): if login_submission["type"] == LoginRestServlet.PASS_TYPE: result = yield self.do_password_login(login_submission) defer.returnValue(result) - elif login_submission["type"] == LoginRestServlet.SAML2_TYPE: + elif self.saml2_enabled and (login_submission["type"] == + LoginRestServlet.SAML2_TYPE): relay_state = "" if "relay_state" in login_submission: relay_state = "&RelayState="+urllib.quote( @@ -173,5 +177,6 @@ def _parse_json(request): def register_servlets(hs, http_server): LoginRestServlet(hs).register(http_server) - SAML2RestServlet(hs).register(http_server) + if hs.config.saml2_config['enabled']: + SAML2RestServlet(hs).register(http_server) # TODO PasswordResetRestServlet(hs).register(http_server) From 1af188209a03567dc4b5300b9a0fc8613ad176df Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 11:39:30 +0100 Subject: [PATCH 049/266] Change format of receipts to allow inclusion of data --- synapse/handlers/receipts.py | 24 ++++++++---- synapse/storage/receipts.py | 39 ++++++++++---------- synapse/storage/schema/delta/21/receipts.sql | 16 +++----- 3 files changed, 42 insertions(+), 37 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 053ed8480..8a052f071 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -36,6 +36,7 @@ class ReceiptsHandler(BaseHandler): self.federation.register_edu_handler( "m.receipt", self._received_remote_receipt ) + self.clock = self.hs.get_clock() self._receipt_cache = None @@ -51,6 +52,9 @@ class ReceiptsHandler(BaseHandler): "receipt_type": receipt_type, "user_id": user_id, "event_ids": [event_id], + "data": { + "ts": self.clock.time_msec() + } } is_new = yield self._handle_new_receipts([receipt]) @@ -65,12 +69,12 @@ class ReceiptsHandler(BaseHandler): "room_id": room_id, "receipt_type": receipt_type, "user_id": user_id, - "event_ids": [event_id], + "event_ids": user_values["event_ids"], + "data": user_values.get("data", {}), } for room_id, room_values in content.items() - for event_id, ev_values in room_values.items() - for receipt_type, users in ev_values.items() - for user_id in users + for receipt_type, users in room_values.items() + for user_id, user_values in users.items() ] yield self._handle_new_receipts(receipts) @@ -82,9 +86,10 @@ class ReceiptsHandler(BaseHandler): receipt_type = receipt["receipt_type"] user_id = receipt["user_id"] event_ids = receipt["event_ids"] + data = receipt["data"] res = yield self.store.insert_receipt( - room_id, receipt_type, user_id, event_ids, + room_id, receipt_type, user_id, event_ids, data ) if not res: @@ -108,6 +113,7 @@ class ReceiptsHandler(BaseHandler): receipt_type = receipt["receipt_type"] user_id = receipt["user_id"] event_ids = receipt["event_ids"] + data = receipt["data"] remotedomains = set() @@ -124,10 +130,12 @@ class ReceiptsHandler(BaseHandler): edu_type="m.receipt", content={ room_id: { - event_id: { - receipt_type: [user_id] + receipt_type: { + user_id: { + "event_ids": event_ids, + "data": data, + } } - for event_id in event_ids }, }, ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 503f68f85..c4e6b02bd 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,6 +21,7 @@ from synapse.util import unwrapFirstError from blist import sorteddict import logging +import ujson as json logger = logging.getLogger(__name__) @@ -91,8 +92,8 @@ class ReceiptsStore(SQLBaseStore): content.setdefault( row["event_id"], {} ).setdefault( - row["receipt_type"], [] - ).append(row["user_id"]) + row["receipt_type"], {} + )[row["user_id"]] = json.loads(row["data"]) defer.returnValue([{ "type": "m.receipt", @@ -124,7 +125,7 @@ class ReceiptsStore(SQLBaseStore): defer.returnValue(result) def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, - user_id, event_id, stream_id): + user_id, event_id, data, stream_id): # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts @@ -172,13 +173,14 @@ class ReceiptsStore(SQLBaseStore): "receipt_type": receipt_type, "user_id": user_id, "event_id": event_id, + "data": json.dumps(data), } ) return True @defer.inlineCallbacks - def insert_receipt(self, room_id, receipt_type, user_id, event_ids): + def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data): if not event_ids: return @@ -214,6 +216,7 @@ class ReceiptsStore(SQLBaseStore): "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, + data, stream_id=stream_id, ) @@ -221,22 +224,22 @@ class ReceiptsStore(SQLBaseStore): defer.returnValue(None) yield self.insert_graph_receipt( - room_id, receipt_type, user_id, event_ids + room_id, receipt_type, user_id, event_ids, data ) max_persisted_id = yield self._stream_id_gen.get_max_token(self) defer.returnValue((stream_id, max_persisted_id)) - def insert_graph_receipt(self, room_id, receipt_type, - user_id, event_ids): + def insert_graph_receipt(self, room_id, receipt_type, user_id, event_ids, + data): return self.runInteraction( "insert_graph_receipt", self.insert_graph_receipt_txn, - room_id, receipt_type, user_id, event_ids, + room_id, receipt_type, user_id, event_ids, data ) def insert_graph_receipt_txn(self, txn, room_id, receipt_type, - user_id, event_ids): + user_id, event_ids, data): self._simple_delete_txn( txn, table="receipts_graph", @@ -246,18 +249,16 @@ class ReceiptsStore(SQLBaseStore): "user_id": user_id, } ) - self._simple_insert_many_txn( + self._simple_insert_txn( txn, table="receipts_graph", - values=[ - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_id": event_id, - } - for event_id in event_ids - ], + values={ + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_ids": json.dumps(event_ids), + "data": json.dumps(data), + } ) diff --git a/synapse/storage/schema/delta/21/receipts.sql b/synapse/storage/schema/delta/21/receipts.sql index ac7738e37..2f64d609f 100644 --- a/synapse/storage/schema/delta/21/receipts.sql +++ b/synapse/storage/schema/delta/21/receipts.sql @@ -18,11 +18,9 @@ CREATE TABLE IF NOT EXISTS receipts_graph( room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, - event_id TEXT NOT NULL -); - -CREATE INDEX receipts_graph_room_tuple ON receipts_graph( - room_id, receipt_type, user_id + event_ids TEXT NOT NULL, + data TEXT NOT NULL, + CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id) ); CREATE TABLE IF NOT EXISTS receipts_linearized ( @@ -30,11 +28,9 @@ CREATE TABLE IF NOT EXISTS receipts_linearized ( room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, - event_id TEXT NOT NULL -); - -CREATE INDEX receipts_linearized_room_tuple ON receipts_linearized( - room_id, receipt_type, user_id + event_id TEXT NOT NULL, + data TEXT NOT NULL, + CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id) ); CREATE INDEX receipts_linearized_id ON receipts_linearized( From 294dbd712fee435489fe6e9f9942bd9c39fd480c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 11:47:24 +0100 Subject: [PATCH 050/266] We don't want semicolons. --- synapse/config/tls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index e136d1371..6c1df35e8 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -27,7 +27,7 @@ class TlsConfig(Config): self.tls_certificate = self.read_tls_certificate( config.get("tls_certificate_path") ) - self.tls_certificate_file = config.get("tls_certificate_path"); + self.tls_certificate_file = config.get("tls_certificate_path") self.no_tls = config.get("no_tls", False) From c2d08ca62aebdfa689ae7d39ff74b2a43e4b5e3a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 13:15:34 +0100 Subject: [PATCH 051/266] Integer timestamps --- synapse/handlers/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 8a052f071..403c1c849 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -53,7 +53,7 @@ class ReceiptsHandler(BaseHandler): "user_id": user_id, "event_ids": [event_id], "data": { - "ts": self.clock.time_msec() + "ts": int(self.clock.time_msec()), } } From bf0d59ed30b63c6a355e7b3f2a74a26181fd6893 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 9 Jul 2015 14:04:03 +0100 Subject: [PATCH 052/266] Don't bother with a timeout for one time keys on the server. --- synapse/rest/client/v2_alpha/keys.py | 25 ++++++------------- synapse/storage/end_to_end_keys.py | 20 +++++---------- .../schema/delta/21/end_to_end_keys.sql | 1 - 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 3bb4ad64f..4b617c251 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -50,7 +50,6 @@ class KeyUploadServlet(RestServlet): "one_time_keys": { ":": "" }, - "one_time_keys_valid_for": , } """ PATTERN = client_v2_pattern("/keys/upload/(?P[^/]*)") @@ -87,13 +86,10 @@ class KeyUploadServlet(RestServlet): ) one_time_keys = body.get("one_time_keys", None) - one_time_keys_valid_for = body.get("one_time_keys_valid_for", None) if one_time_keys: - valid_until = int(one_time_keys_valid_for) + time_now logger.info( - "Adding %d one_time_keys for device %r for user %r at %d" - " valid_until %d", - len(one_time_keys), device_id, user_id, time_now, valid_until + "Adding %d one_time_keys for device %r for user %r at %d", + len(one_time_keys), device_id, user_id, time_now ) key_list = [] for key_id, key_json in one_time_keys.items(): @@ -103,23 +99,18 @@ class KeyUploadServlet(RestServlet): )) yield self.store.add_e2e_one_time_keys( - user_id, device_id, time_now, valid_until, key_list + user_id, device_id, time_now, key_list ) - result = yield self.store.count_e2e_one_time_keys( - user_id, device_id, time_now - ) + result = yield self.store.count_e2e_one_time_keys(user_id, device_id) defer.returnValue((200, {"one_time_key_counts": result})) @defer.inlineCallbacks def on_GET(self, request, device_id): auth_user, client_info = yield self.auth.get_user_by_req(request) user_id = auth_user.to_string() - time_now = self.clock.time_msec() - result = yield self.store.count_e2e_one_time_keys( - user_id, device_id, time_now - ) + result = yield self.store.count_e2e_one_time_keys(user_id, device_id) defer.returnValue((200, {"one_time_key_counts": result})) @@ -249,9 +240,8 @@ class OneTimeKeyServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id, device_id, algorithm): yield self.auth.get_user_by_req(request) - time_now = self.clock.time_msec() results = yield self.store.take_e2e_one_time_keys( - [(user_id, device_id, algorithm)], time_now + [(user_id, device_id, algorithm)] ) defer.returnValue(self.json_result(request, results)) @@ -266,8 +256,7 @@ class OneTimeKeyServlet(RestServlet): for user_id, device_keys in body.get("one_time_keys", {}).items(): for device_id, algorithm in device_keys.items(): query.append((user_id, device_id, algorithm)) - time_now = self.clock.time_msec() - results = yield self.store.take_e2e_one_time_keys(query, time_now) + results = yield self.store.take_e2e_one_time_keys(query) defer.returnValue(self.json_result(request, results)) def json_result(self, request, results): diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index b3cede37e..99dc864e4 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -55,14 +55,8 @@ class EndToEndKeyStore(SQLBaseStore): return result return self.runInteraction("get_e2e_device_keys", _get_e2e_device_keys) - def add_e2e_one_time_keys(self, user_id, device_id, time_now, valid_until, - key_list): + def add_e2e_one_time_keys(self, user_id, device_id, time_now, key_list): def _add_e2e_one_time_keys(txn): - sql = ( - "DELETE FROM e2e_one_time_keys_json" - " WHERE user_id = ? AND device_id = ? AND valid_until_ms < ?" - ) - txn.execute(sql, (user_id, device_id, time_now)) for (algorithm, key_id, json_bytes) in key_list: self._simple_upsert_txn( txn, table="e2e_one_time_keys_json", @@ -74,7 +68,6 @@ class EndToEndKeyStore(SQLBaseStore): }, values={ "ts_added_ms": time_now, - "valid_until_ms": valid_until, "key_json": json_bytes, } ) @@ -82,7 +75,7 @@ class EndToEndKeyStore(SQLBaseStore): "add_e2e_one_time_keys", _add_e2e_one_time_keys ) - def count_e2e_one_time_keys(self, user_id, device_id, time_now): + def count_e2e_one_time_keys(self, user_id, device_id): """ Count the number of one time keys the server has for a device Returns: Dict mapping from algorithm to number of keys for that algorithm. @@ -90,10 +83,10 @@ class EndToEndKeyStore(SQLBaseStore): def _count_e2e_one_time_keys(txn): sql = ( "SELECT algorithm, COUNT(key_id) FROM e2e_one_time_keys_json" - " WHERE user_id = ? AND device_id = ? AND valid_until_ms >= ?" + " WHERE user_id = ? AND device_id = ?" " GROUP BY algorithm" ) - txn.execute(sql, (user_id, device_id, time_now)) + txn.execute(sql, (user_id, device_id)) result = {} for algorithm, key_count in txn.fetchall(): result[algorithm] = key_count @@ -102,13 +95,12 @@ class EndToEndKeyStore(SQLBaseStore): "count_e2e_one_time_keys", _count_e2e_one_time_keys ) - def take_e2e_one_time_keys(self, query_list, time_now): + def take_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" def _take_e2e_one_time_keys(txn): sql = ( "SELECT key_id, key_json FROM e2e_one_time_keys_json" " WHERE user_id = ? AND device_id = ? AND algorithm = ?" - " AND valid_until_ms > ?" " LIMIT 1" ) result = {} @@ -116,7 +108,7 @@ class EndToEndKeyStore(SQLBaseStore): for user_id, device_id, algorithm in query_list: user_result = result.setdefault(user_id, {}) device_result = user_result.setdefault(device_id, {}) - txn.execute(sql, (user_id, device_id, algorithm, time_now)) + txn.execute(sql, (user_id, device_id, algorithm)) for key_id, key_json in txn.fetchall(): device_result[algorithm + ":" + key_id] = key_json delete.append((user_id, device_id, algorithm, key_id)) diff --git a/synapse/storage/schema/delta/21/end_to_end_keys.sql b/synapse/storage/schema/delta/21/end_to_end_keys.sql index 107d2e67c..95e27eb7e 100644 --- a/synapse/storage/schema/delta/21/end_to_end_keys.sql +++ b/synapse/storage/schema/delta/21/end_to_end_keys.sql @@ -29,7 +29,6 @@ CREATE TABLE IF NOT EXISTS e2e_one_time_keys_json ( algorithm TEXT NOT NULL, -- Which algorithm this one-time key is for. key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads. ts_added_ms BIGINT NOT NULL, -- When this key was uploaded. - valid_until_ms BIGINT NOT NULL, -- When this key is valid until. key_json TEXT NOT NULL, -- The key as a JSON blob. CONSTRAINT uniqueness UNIQUE (user_id, device_id, algorithm, key_id) ); From f0979afdb0b0b8f96730a97cf0b51de6024cabda Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 16:02:07 +0100 Subject: [PATCH 053/266] Remove spurious comment --- synapse/handlers/receipts.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 403c1c849..f847360d0 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Contains handlers for federation events.""" - from ._base import BaseHandler from twisted.internet import defer From ed887209520cc3268885c918738734f8b9874c81 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 16:14:46 +0100 Subject: [PATCH 054/266] Handle error slightly better --- synapse/storage/receipts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c4e6b02bd..593032713 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -188,6 +188,7 @@ class ReceiptsStore(SQLBaseStore): linearized_event_id = event_ids[0] else: # we need to points in graph -> linearized form. + # TODO: Make this better. def graph_to_linear(txn): query = ( "SELECT event_id WHERE room_id = ? AND stream_ordering IN (" @@ -200,8 +201,7 @@ class ReceiptsStore(SQLBaseStore): if rows: return rows[0][0] else: - # TODO: ARGH?! - return None + raise RuntimeError("Unrecognized event_ids: %r" % (event_ids,)) linearized_event_id = yield self.runInteraction( "insert_receipt_conv", graph_to_linear From b5f0d73ea3f6611c0980a03a0dfe57058071013e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 9 Jul 2015 17:09:26 +0100 Subject: [PATCH 055/266] Add comment --- synapse/handlers/federation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cd3867ed9..d7f197f24 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -242,6 +242,10 @@ class FederationHandler(BaseHandler): if history: visibility = history.content.get("history_visibility", "shared") if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. for ev in state.values(): if ev.type != EventTypes.Member: continue From a887efa07aa9757538cb36d0dfa2b79925eba4aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 10:37:24 +0100 Subject: [PATCH 056/266] Add Muthu Subramanian to AUTHORS --- AUTHORS.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index d7224ff5d..54ced6700 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -42,3 +42,6 @@ Ivan Shapovalov Eric Myhre * Fix bug where ``media_store_path`` config option was ignored by v0 content repository API. + +Muthu Subramanian + * Add SAML2 support for registration and logins. From f3049d0b81ad626de7ca80330608b374e0ec8b5b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 10:50:03 +0100 Subject: [PATCH 057/266] Small tweaks to SAML2 configuration. - Add saml2 config docs to default config. - Use existence of saml2 config to indicate if saml2 should be enabled. --- synapse/config/saml2.py | 48 ++++++++++++++++++++------------- synapse/rest/client/v1/login.py | 8 +++--- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index be5176db5..153203687 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -16,27 +16,39 @@ from ._base import Config -# -# SAML2 Configuration -# Synapse uses pysaml2 libraries for providing SAML2 support -# -# config_path: Path to the sp_conf.py configuration file -# idp_redirect_url: Identity provider URL which will redirect -# the user back to /login/saml2 with proper info. -# -# sp_conf.py file is something like: -# https://github.com/rohe/pysaml2/blob/master/example/sp-repoze/sp_conf.py.example -# -# More information: https://pythonhosted.org/pysaml2/howto/config.html -# class SAML2Config(Config): + """SAML2 Configuration + Synapse uses pysaml2 libraries for providing SAML2 support + + config_path: Path to the sp_conf.py configuration file + idp_redirect_url: Identity provider URL which will redirect + the user back to /login/saml2 with proper info. + + sp_conf.py file is something like: + https://github.com/rohe/pysaml2/blob/master/example/sp-repoze/sp_conf.py.example + + More information: https://pythonhosted.org/pysaml2/howto/config.html + """ + def read_config(self, config): - self.saml2_config = config["saml2_config"] + saml2_config = config.get("saml2_config", None) + if saml2_config: + self.saml2_enabled = True + self.saml2_config_path = saml2_config["config_path"] + self.saml2_idp_redirect_url = saml2_config["idp_redirect_url"] + else: + self.saml2_enabled = False + self.saml2_config_path = None + self.saml2_idp_redirect_url = None def default_config(self, config_dir_path, server_name): return """ - saml2_config: - enabled: false - config_path: "%s/sp_conf.py" - idp_redirect_url: "http://%s/idp" + # Enable SAML2 for registration and login. Uses pysaml2 + # config_path: Path to the sp_conf.py configuration file + # idp_redirect_url: Identity provider URL which will redirect + # the user back to /login/saml2 with proper info. + # See pysaml2 docs for format of config. + #saml2_config: + # config_path: "%s/sp_conf.py" + # idp_redirect_url: "http://%s/idp" """ % (config_dir_path, server_name) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index f64f5e990..998d4d44c 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -38,8 +38,8 @@ class LoginRestServlet(ClientV1RestServlet): def __init__(self, hs): super(LoginRestServlet, self).__init__(hs) - self.idp_redirect_url = hs.config.saml2_config['idp_redirect_url'] - self.saml2_enabled = hs.config.saml2_config['enabled'] + self.idp_redirect_url = hs.config.saml2_idp_redirect_url + self.saml2_enabled = hs.config.saml2_enabled def on_GET(self, request): flows = [{"type": LoginRestServlet.PASS_TYPE}] @@ -127,7 +127,7 @@ class SAML2RestServlet(ClientV1RestServlet): def __init__(self, hs): super(SAML2RestServlet, self).__init__(hs) - self.sp_config = hs.config.saml2_config['config_path'] + self.sp_config = hs.config.saml2_config_path @defer.inlineCallbacks def on_POST(self, request): @@ -177,6 +177,6 @@ def _parse_json(request): def register_servlets(hs, http_server): LoginRestServlet(hs).register(http_server) - if hs.config.saml2_config['enabled']: + if hs.config.saml2_enabled: SAML2RestServlet(hs).register(http_server) # TODO PasswordResetRestServlet(hs).register(http_server) From a01097d60b9c711d71cd5d1c63cb4fb5b95a8a63 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 10 Jul 2015 13:26:18 +0100 Subject: [PATCH 058/266] Assume that each device for a user has only one of each type of key --- synapse/rest/client/v2_alpha/keys.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 4b617c251..f03126775 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -41,11 +41,11 @@ class KeyUploadServlet(RestServlet): "m.olm.curve25519-aes-sha256", ] "keys": { - ":": "", + ":": "", }, "signatures:" { - "/" { - ":": "" + "" { + ":": "" } } }, "one_time_keys": { ":": "" From 0d7f0febf45575666d182cb2ad0cf451b11fbf07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 13:43:03 +0100 Subject: [PATCH 059/266] Uniquely name unique constraint --- synapse/storage/schema/delta/21/end_to_end_keys.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/delta/21/end_to_end_keys.sql b/synapse/storage/schema/delta/21/end_to_end_keys.sql index 95e27eb7e..8b4a380d1 100644 --- a/synapse/storage/schema/delta/21/end_to_end_keys.sql +++ b/synapse/storage/schema/delta/21/end_to_end_keys.sql @@ -19,7 +19,7 @@ CREATE TABLE IF NOT EXISTS e2e_device_keys_json ( device_id TEXT NOT NULL, -- Which of the user's devices these keys are for. ts_added_ms BIGINT NOT NULL, -- When the keys were uploaded. key_json TEXT NOT NULL, -- The keys for the device as a JSON blob. - CONSTRAINT uniqueness UNIQUE (user_id, device_id) + CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) ); @@ -30,5 +30,5 @@ CREATE TABLE IF NOT EXISTS e2e_one_time_keys_json ( key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads. ts_added_ms BIGINT NOT NULL, -- When this key was uploaded. key_json TEXT NOT NULL, -- The key as a JSON blob. - CONSTRAINT uniqueness UNIQUE (user_id, device_id, algorithm, key_id) + CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) ); From 7e3b14fe782eedbe37a0eb8c17da605d2373e594 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 13:21:31 +0100 Subject: [PATCH 060/266] You shouldn't be able to ban/kick users with higher power levels --- synapse/api/auth.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 4da62e5d8..bd2f058e4 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -187,6 +187,9 @@ class Auth(object): join_rule = JoinRules.INVITE user_level = self._get_user_power_level(event.user_id, auth_events) + target_level = self._get_user_power_level( + target_user_id, auth_events + ) # FIXME (erikj): What should we do here as the default? ban_level = self._get_named_level(auth_events, "ban", 50) @@ -258,12 +261,12 @@ class Auth(object): elif target_user_id != event.user_id: kick_level = self._get_named_level(auth_events, "kick", 50) - if user_level < kick_level: + if user_level < kick_level or user_level < target_level: raise AuthError( 403, "You cannot kick user %s." % target_user_id ) elif Membership.BAN == membership: - if user_level < ban_level: + if user_level < ban_level or user_level < target_level: raise AuthError(403, "You don't have permission to ban") else: raise AuthError(500, "Unknown membership %s" % membership) From a5ea22d4683ea890cb4c4ba000502f116814dd1d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 13:42:24 +0100 Subject: [PATCH 061/266] Sanitize power level checks --- synapse/api/auth.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index bd2f058e4..ee52ff66d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -261,12 +261,12 @@ class Auth(object): elif target_user_id != event.user_id: kick_level = self._get_named_level(auth_events, "kick", 50) - if user_level < kick_level or user_level < target_level: + if user_level < kick_level or user_level <= target_level: raise AuthError( 403, "You cannot kick user %s." % target_user_id ) elif Membership.BAN == membership: - if user_level < ban_level or user_level < target_level: + if user_level < ban_level or user_level <= target_level: raise AuthError(403, "You don't have permission to ban") else: raise AuthError(500, "Unknown membership %s" % membership) @@ -576,25 +576,25 @@ class Auth(object): # Check other levels: levels_to_check = [ - ("users_default", []), - ("events_default", []), - ("ban", []), - ("redact", []), - ("kick", []), - ("invite", []), + ("users_default", None), + ("events_default", None), + ("ban", None), + ("redact", None), + ("kick", None), + ("invite", None), ] old_list = current_state.content.get("users") for user in set(old_list.keys() + user_list.keys()): levels_to_check.append( - (user, ["users"]) + (user, "users") ) old_list = current_state.content.get("events") new_list = event.content.get("events") for ev_id in set(old_list.keys() + new_list.keys()): levels_to_check.append( - (ev_id, ["events"]) + (ev_id, "events") ) old_state = current_state.content @@ -602,12 +602,10 @@ class Auth(object): for level_to_check, dir in levels_to_check: old_loc = old_state - for d in dir: - old_loc = old_loc.get(d, {}) - new_loc = new_state - for d in dir: - new_loc = new_loc.get(d, {}) + if dir: + old_loc = old_loc.get(dir, {}) + new_loc = new_loc.get(dir, {}) if level_to_check in old_loc: old_level = int(old_loc[level_to_check]) @@ -623,6 +621,14 @@ class Auth(object): if new_level == old_level: continue + if dir == "users" and level_to_check != event.user_id: + if old_level == user_level: + raise AuthError( + 403, + "You don't have permission to remove ops level equal " + "to your own" + ) + if old_level > user_level or new_level > user_level: raise AuthError( 403, From 17bb9a7eb93cdc5534b63d5c2ccf2d978b2bec84 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2015 14:07:57 +0100 Subject: [PATCH 062/266] Remove commented out code --- synapse/storage/events.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d760acb87..ed7ea3880 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -366,20 +366,6 @@ class EventsStore(SQLBaseStore): txn, event.event_id, context.rejected ) - # for hash_alg, hash_base64 in event.hashes.items(): - # hash_bytes = decode_base64(hash_base64) - # self._store_event_content_hash_txn( - # txn, event.event_id, hash_alg, hash_bytes, - # ) - - # for prev_event_id, prev_hashes in event.prev_events: - # for alg, hash_base64 in prev_hashes.items(): - # hash_bytes = decode_base64(hash_base64) - # self._store_prev_event_hash_txn( - # txn, event.event_id, prev_event_id, alg, - # hash_bytes - # ) - self._simple_insert_many_txn( txn, table="event_auth", From e5991af629df2e63c20c5f10e4589a9faf8305cb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 13 Jul 2015 13:30:43 +0100 Subject: [PATCH 063/266] Comments --- synapse/handlers/receipts.py | 16 ++++++++++++---- synapse/storage/receipts.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f847360d0..1925a4803 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -41,10 +41,9 @@ class ReceiptsHandler(BaseHandler): @defer.inlineCallbacks def received_client_receipt(self, room_id, receipt_type, user_id, event_id): - # 1. Persist. - # 2. Notify local clients - # 3. Notify remote servers - + """Called when a client tells us a local user has read up to the given + event_id in the room. + """ receipt = { "room_id": room_id, "receipt_type": receipt_type, @@ -62,6 +61,8 @@ class ReceiptsHandler(BaseHandler): @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): + """Called when we receive an EDU of type m.receipt from a remote HS. + """ receipts = [ { "room_id": room_id, @@ -79,6 +80,8 @@ class ReceiptsHandler(BaseHandler): @defer.inlineCallbacks def _handle_new_receipts(self, receipts): + """Takes a list of receipts, stores them and informs the notifier. + """ for receipt in receipts: room_id = receipt["room_id"] receipt_type = receipt["receipt_type"] @@ -105,6 +108,9 @@ class ReceiptsHandler(BaseHandler): @defer.inlineCallbacks def _push_remotes(self, receipts): + """Given a list of receipts, works out which remote servers should be + poked and pokes them. + """ # TODO: Some of this stuff should be coallesced. for receipt in receipts: room_id = receipt["room_id"] @@ -140,6 +146,8 @@ class ReceiptsHandler(BaseHandler): @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): + """Gets all receipts for a room, upto the given key. + """ result = yield self.store.get_linearized_receipts_for_room( room_id, None, to_key ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 593032713..56b9fedfd 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -35,6 +35,8 @@ class ReceiptsStore(SQLBaseStore): @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, from_key, to_key): + """Get receipts for multiple rooms for sending to clients. + """ room_ids = set(room_ids) if from_key: @@ -54,6 +56,8 @@ class ReceiptsStore(SQLBaseStore): @defer.inlineCallbacks def get_linearized_receipts_for_room(self, room_id, from_key, to_key): + """Get receipts for a single room for sending to clients. + """ def f(txn): if from_key: sql = ( @@ -107,6 +111,8 @@ class ReceiptsStore(SQLBaseStore): @cached @defer.inlineCallbacks def get_graph_receipts_for_room(self, room_id): + """Get receipts for sending to remote servers. + """ rows = yield self._simple_select_list( table="receipts_graph", keyvalues={"room_id": room_id}, @@ -181,6 +187,11 @@ class ReceiptsStore(SQLBaseStore): @defer.inlineCallbacks def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data): + """Insert a receipt, either from local client or remote server. + + Automatically does conversion between linearized and graph + representations. + """ if not event_ids: return From d5cc7945985e4cbf17c7dbf4c7c45071b87d030e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 13 Jul 2015 16:48:06 +0100 Subject: [PATCH 064/266] Implement presets at room creation --- synapse/api/constants.py | 5 +++ synapse/handlers/room.py | 82 +++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 22 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 3e15e8a9d..885b9c359 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -87,3 +87,8 @@ class RejectedReason(object): AUTH_ERROR = "auth_error" REPLACED = "replaced" NOT_ANCESTOR = "not_ancestor" + + +class RoomCreationPreset(object): + PrivateChat = "private_chat" + PublicChat = "public_chat" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 891707df4..c65439543 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -19,7 +19,9 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.types import UserID, RoomAlias, RoomID -from synapse.api.constants import EventTypes, Membership, JoinRules +from synapse.api.constants import ( + EventTypes, Membership, JoinRules, RoomCreationPreset, +) from synapse.api.errors import StoreError, SynapseError from synapse.util import stringutils, unwrapFirstError from synapse.util.async import run_on_reactor @@ -33,6 +35,19 @@ logger = logging.getLogger(__name__) class RoomCreationHandler(BaseHandler): + PRESETS_DICT = { + RoomCreationPreset.PrivateChat: { + "join_rules": JoinRules.INVITE, + "history_visibility": "invited", + "everyone_ops": False, + }, + RoomCreationPreset.PublicChat: { + "join_rules": JoinRules.PUBLIC, + "history_visibility": "shared", + "everyone_ops": False, + }, + } + @defer.inlineCallbacks def create_room(self, user_id, room_id, config): """ Creates a new room. @@ -121,9 +136,18 @@ class RoomCreationHandler(BaseHandler): servers=[self.hs.hostname], ) + preset_config = config.get( + "preset", + RoomCreationPreset.PublicChat + if is_public + else RoomCreationPreset.PrivateChat + ) + user = UserID.from_string(user_id) creation_events = self._create_events_for_new_room( - user, room_id, is_public=is_public + user, room_id, + preset_config=preset_config, + invite_list=invite_list, ) msg_handler = self.hs.get_handlers().message_handler @@ -170,7 +194,10 @@ class RoomCreationHandler(BaseHandler): defer.returnValue(result) - def _create_events_for_new_room(self, creator, room_id, is_public=False): + def _create_events_for_new_room(self, creator, room_id, preset_config, + invite_list): + config = RoomCreationHandler.PRESETS_DICT[preset_config] + creator_id = creator.to_string() event_keys = { @@ -203,37 +230,48 @@ class RoomCreationHandler(BaseHandler): }, ) + power_level_content = { + "users": { + creator.to_string(): 100, + }, + "users_default": 0, + "events": { + EventTypes.Name: 100, + EventTypes.PowerLevels: 100, + EventTypes.RoomHistoryVisibility: 100, + }, + "events_default": 0, + "state_default": 50, + "ban": 50, + "kick": 50, + "redact": 50, + "invite": 0, + } + + if config["everyone_ops"]: + for invitee in invite_list: + power_level_content["users"][invitee] = 100 + power_levels_event = create( etype=EventTypes.PowerLevels, - content={ - "users": { - creator.to_string(): 100, - }, - "users_default": 0, - "events": { - EventTypes.Name: 100, - EventTypes.PowerLevels: 100, - EventTypes.RoomHistoryVisibility: 100, - }, - "events_default": 0, - "state_default": 50, - "ban": 50, - "kick": 50, - "redact": 50, - "invite": 0, - }, + content=power_level_content, ) - join_rule = JoinRules.PUBLIC if is_public else JoinRules.INVITE join_rules_event = create( etype=EventTypes.JoinRules, - content={"join_rule": join_rule}, + content={"join_rule": config["join_rules"]}, + ) + + history_event = create( + etype=EventTypes.RoomHistoryVisibility, + content={"history_visibility": config["history_visibility"]} ) return [ creation_event, join_event, power_levels_event, + history_event, join_rules_event, ] From 4624d6035e28c4ee05e38234f2aa1671b4ac701a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Jul 2015 10:19:07 +0100 Subject: [PATCH 065/266] Docs --- synapse/handlers/receipts.py | 11 ++++++++--- synapse/storage/receipts.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 1925a4803..5b3df6932 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -149,7 +149,8 @@ class ReceiptsHandler(BaseHandler): """Gets all receipts for a room, upto the given key. """ result = yield self.store.get_linearized_receipts_for_room( - room_id, None, to_key + room_id, + to_key=to_key, ) if not result: @@ -176,7 +177,9 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] events = yield self.store.get_linearized_receipts_for_rooms( - rooms, from_key, to_key + rooms, + from_key=from_key, + to_key=to_key, ) defer.returnValue((events, to_key)) @@ -196,7 +199,9 @@ class ReceiptEventSource(object): rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] events = yield self.store.get_linearized_receipts_for_rooms( - rooms, from_key, to_key + rooms, + from_key=from_key, + to_key=to_key, ) defer.returnValue((events, to_key)) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 56b9fedfd..d515a0a15 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -34,8 +34,17 @@ class ReceiptsStore(SQLBaseStore): self._receipts_stream_cache = _RoomStreamChangeCache() @defer.inlineCallbacks - def get_linearized_receipts_for_rooms(self, room_ids, from_key, to_key): + def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): """Get receipts for multiple rooms for sending to clients. + + Args: + room_ids (list): List of room_ids. + to_key (int): Max stream id to fetch receipts upto. + from_key (int): Min stream id to fetch receipts from. None fetches + from the start. + + Returns: + list: A list of receipts. """ room_ids = set(room_ids) @@ -46,7 +55,9 @@ class ReceiptsStore(SQLBaseStore): results = yield defer.gatherResults( [ - self.get_linearized_receipts_for_room(room_id, from_key, to_key) + self.get_linearized_receipts_for_room( + room_id, to_key, from_key=from_key + ) for room_id in room_ids ], consumeErrors=True, @@ -55,8 +66,17 @@ class ReceiptsStore(SQLBaseStore): defer.returnValue([ev for res in results for ev in res]) @defer.inlineCallbacks - def get_linearized_receipts_for_room(self, room_id, from_key, to_key): + def get_linearized_receipts_for_room(self, room_id, to_key, from_key=None): """Get receipts for a single room for sending to clients. + + Args: + room_ids (str): The room id. + to_key (int): Max stream id to fetch receipts upto. + from_key (int): Min stream id to fetch receipts from. None fetches + from the start. + + Returns: + list: A list of receipts. """ def f(txn): if from_key: @@ -288,6 +308,9 @@ class _RoomStreamChangeCache(object): @defer.inlineCallbacks def get_rooms_changed(self, store, room_ids, key): + """Returns subset of room ids that have had new receipts since the + given key. If the key is too old it will just return the given list. + """ if key > (yield self._get_earliest_key(store)): keys = self._cache.keys() i = keys.bisect_right(key) @@ -302,6 +325,8 @@ class _RoomStreamChangeCache(object): @defer.inlineCallbacks def room_has_changed(self, store, room_id, key): + """Informs the cache that the room has been changed at the given key. + """ if key > (yield self._get_earliest_key(store)): old_key = self._room_to_key.get(room_id, None) if old_key: From b49a30a972470914531c89cd481252e414b22d0e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Jul 2015 10:20:31 +0100 Subject: [PATCH 066/266] Capitalize contants --- synapse/api/constants.py | 4 ++-- synapse/handlers/room.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 885b9c359..7156ee4e7 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -90,5 +90,5 @@ class RejectedReason(object): class RoomCreationPreset(object): - PrivateChat = "private_chat" - PublicChat = "public_chat" + PRIVATE_CHAT = "private_chat" + PUBLIC_CHAT = "public_chat" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index c65439543..11f531dc7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -36,12 +36,12 @@ logger = logging.getLogger(__name__) class RoomCreationHandler(BaseHandler): PRESETS_DICT = { - RoomCreationPreset.PrivateChat: { + RoomCreationPreset.PRIVATE_CHAT: { "join_rules": JoinRules.INVITE, "history_visibility": "invited", "everyone_ops": False, }, - RoomCreationPreset.PublicChat: { + RoomCreationPreset.PUBLIC_CHAT: { "join_rules": JoinRules.PUBLIC, "history_visibility": "shared", "everyone_ops": False, @@ -138,9 +138,9 @@ class RoomCreationHandler(BaseHandler): preset_config = config.get( "preset", - RoomCreationPreset.PublicChat + RoomCreationPreset.PUBLIC_CHAT if is_public - else RoomCreationPreset.PrivateChat + else RoomCreationPreset.PRIVATE_CHAT ) user = UserID.from_string(user_id) From 62b4b72fe4ab084593dffe9a7fe28d51b882ccd7 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 14 Jul 2015 10:33:25 +0100 Subject: [PATCH 067/266] Close, but no cigar. --- synapse/config/captcha.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index cf72dc434..15a132b4e 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -29,10 +29,10 @@ class CaptchaConfig(Config): ## Captcha ## # This Home Server's ReCAPTCHA public key. - recaptcha_private_key: "YOUR_PUBLIC_KEY" + recaptcha_private_key: "YOUR_PRIVATE_KEY" # This Home Server's ReCAPTCHA private key. - recaptcha_public_key: "YOUR_PRIVATE_KEY" + recaptcha_public_key: "YOUR_PUBLIC_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha From 002a44ac1a59f101e3df18dee5ef7c3ad87440ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Jul 2015 10:37:42 +0100 Subject: [PATCH 068/266] s/everyone_ops/original_invitees_have_ops/ --- synapse/handlers/room.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 11f531dc7..c081efee3 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -39,12 +39,12 @@ class RoomCreationHandler(BaseHandler): RoomCreationPreset.PRIVATE_CHAT: { "join_rules": JoinRules.INVITE, "history_visibility": "invited", - "everyone_ops": False, + "original_invitees_have_ops": False, }, RoomCreationPreset.PUBLIC_CHAT: { "join_rules": JoinRules.PUBLIC, "history_visibility": "shared", - "everyone_ops": False, + "original_invitees_have_ops": False, }, } @@ -248,7 +248,7 @@ class RoomCreationHandler(BaseHandler): "invite": 0, } - if config["everyone_ops"]: + if config["original_invitees_have_ops"]: for invitee in invite_list: power_level_content["users"][invitee] = 100 From 8cedf3ce959ed1bd4d9c52a974bb097476f409f0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 14 Jul 2015 23:53:05 +0100 Subject: [PATCH 069/266] bump up image quality a bit more as it looks crap --- synapse/rest/media/v1/thumbnailer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 28404f2b7..1e965c363 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -82,7 +82,7 @@ class Thumbnailer(object): def save_image(self, output_image, output_type, output_path): output_bytes_io = BytesIO() - output_image.save(output_bytes_io, self.FORMATS[output_type], quality=70) + output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) output_bytes = output_bytes_io.getvalue() with open(output_path, "wb") as output_file: output_file.write(output_bytes) From 4da05fa0ae32425ce2755dcd479bb4c97f43b30e Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 15 Jul 2015 19:28:03 +0100 Subject: [PATCH 070/266] Add back in support for remembering parameters submitted to a user-interactive auth call. --- synapse/handlers/auth.py | 6 ++++-- synapse/rest/client/v2_alpha/register.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 63071653a..1ecf7fef1 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -85,8 +85,10 @@ class AuthHandler(BaseHandler): # email auth link on there). It's probably too open to abuse # because it lets unauthenticated clients store arbitrary objects # on a home server. - # sess['clientdict'] = clientdict - # self._save_session(sess) + # Revisit: Assumimg the REST APIs do sensible validation, the data + # isn't arbintrary. + sess['clientdict'] = clientdict + self._save_session(sess) pass elif 'clientdict' in sess: clientdict = sess['clientdict'] diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 72dfb876c..fa44572b7 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -57,10 +57,17 @@ class RegisterRestServlet(RestServlet): yield run_on_reactor() body = parse_request_allow_empty(request) - if 'password' not in body: - raise SynapseError(400, "", Codes.MISSING_PARAM) + # we do basic sanity checks here because the auth layerwill store these in sessions + if 'password' in body: + print "%r" % (body['password']) + if (not isinstance(body['password'], str) and + not isinstance(body['password'], unicode)) or len(body['password']) > 512: + raise SynapseError(400, "Invalid password") if 'username' in body: + if (not isinstance(body['username'], str) and + not isinstance(body['username'], unicode)) or len(body['username']) > 512: + raise SynapseError(400, "Invalid username") desired_username = body['username'] yield self.registration_handler.check_username(desired_username) From 09489499e7e4e61cd3fcf735584355ba1f780a5c Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 15 Jul 2015 19:39:18 +0100 Subject: [PATCH 071/266] pep8 + debug line --- synapse/rest/client/v2_alpha/register.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index fa44572b7..0c737d73b 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -57,16 +57,18 @@ class RegisterRestServlet(RestServlet): yield run_on_reactor() body = parse_request_allow_empty(request) - # we do basic sanity checks here because the auth layerwill store these in sessions + # we do basic sanity checks here because the auth + # layer will store these in sessions if 'password' in body: - print "%r" % (body['password']) - if (not isinstance(body['password'], str) and - not isinstance(body['password'], unicode)) or len(body['password']) > 512: + if ((not isinstance(body['password'], str) and + not isinstance(body['password'], unicode)) or + len(body['password']) > 512): raise SynapseError(400, "Invalid password") if 'username' in body: - if (not isinstance(body['username'], str) and - not isinstance(body['username'], unicode)) or len(body['username']) > 512: + if ((not isinstance(body['username'], str) and + not isinstance(body['username'], unicode)) or + len(body['username']) > 512): raise SynapseError(400, "Invalid username") desired_username = body['username'] yield self.registration_handler.check_username(desired_username) From c456d17daf1457bb341bd3c64c616d500c6c2517 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 Jul 2015 15:25:29 +0100 Subject: [PATCH 072/266] Implement specifying custom initial state for /createRoom --- synapse/handlers/room.py | 100 +++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index c081efee3..7511d294f 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -27,6 +27,7 @@ from synapse.util import stringutils, unwrapFirstError from synapse.util.async import run_on_reactor from synapse.events.utils import serialize_event +from collections import OrderedDict import logging import string @@ -143,11 +144,18 @@ class RoomCreationHandler(BaseHandler): else RoomCreationPreset.PRIVATE_CHAT ) + raw_initial_state = config.get("initial_state", []) + + initial_state = OrderedDict() + for val in raw_initial_state: + initial_state[(val["type"], val.get("state_key", ""))] = val["content"] + user = UserID.from_string(user_id) creation_events = self._create_events_for_new_room( user, room_id, preset_config=preset_config, invite_list=invite_list, + initial_state=initial_state, ) msg_handler = self.hs.get_handlers().message_handler @@ -195,7 +203,7 @@ class RoomCreationHandler(BaseHandler): defer.returnValue(result) def _create_events_for_new_room(self, creator, room_id, preset_config, - invite_list): + invite_list, initial_state): config = RoomCreationHandler.PRESETS_DICT[preset_config] creator_id = creator.to_string() @@ -230,50 +238,62 @@ class RoomCreationHandler(BaseHandler): }, ) - power_level_content = { - "users": { - creator.to_string(): 100, - }, - "users_default": 0, - "events": { - EventTypes.Name: 100, - EventTypes.PowerLevels: 100, - EventTypes.RoomHistoryVisibility: 100, - }, - "events_default": 0, - "state_default": 50, - "ban": 50, - "kick": 50, - "redact": 50, - "invite": 0, - } + returned_events = [creation_event, join_event] - if config["original_invitees_have_ops"]: - for invitee in invite_list: - power_level_content["users"][invitee] = 100 + if (EventTypes.PowerLevels, '') not in initial_state: + power_level_content = { + "users": { + creator.to_string(): 100, + }, + "users_default": 0, + "events": { + EventTypes.Name: 100, + EventTypes.PowerLevels: 100, + EventTypes.RoomHistoryVisibility: 100, + }, + "events_default": 0, + "state_default": 50, + "ban": 50, + "kick": 50, + "redact": 50, + "invite": 0, + } - power_levels_event = create( - etype=EventTypes.PowerLevels, - content=power_level_content, - ) + if config["original_invitees_have_ops"]: + for invitee in invite_list: + power_level_content["users"][invitee] = 100 - join_rules_event = create( - etype=EventTypes.JoinRules, - content={"join_rule": config["join_rules"]}, - ) + power_levels_event = create( + etype=EventTypes.PowerLevels, + content=power_level_content, + ) - history_event = create( - etype=EventTypes.RoomHistoryVisibility, - content={"history_visibility": config["history_visibility"]} - ) + returned_events.append(power_levels_event) - return [ - creation_event, - join_event, - power_levels_event, - history_event, - join_rules_event, - ] + if (EventTypes.JoinRules, '') not in initial_state: + join_rules_event = create( + etype=EventTypes.JoinRules, + content={"join_rule": config["join_rules"]}, + ) + + returned_events.append(join_rules_event) + + if (EventTypes.RoomHistoryVisibility, '') not in initial_state: + history_event = create( + etype=EventTypes.RoomHistoryVisibility, + content={"history_visibility": config["history_visibility"]} + ) + + returned_events.append(history_event) + + for (etype, state_key), content in initial_state.items(): + returned_events.append(create( + etype=etype, + state_key=state_key, + content=content, + )) + + return returned_events class RoomMemberHandler(BaseHandler): From 4cab2cfa34ceb384847d8bd5dbe15147b27dc370 Mon Sep 17 00:00:00 2001 From: Matrix Date: Sat, 18 Jul 2015 19:07:12 +0100 Subject: [PATCH 073/266] Don't do any database hits in receipt handling if from_key == to_key --- synapse/handlers/receipts.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 5b3df6932..86c911c4b 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -174,6 +174,9 @@ class ReceiptEventSource(object): from_key = int(from_key) to_key = yield self.get_current_key() + if from_key == to_key: + defer.returnValue(([], to_key)) + rooms = yield self.store.get_rooms_for_user(user.to_string()) rooms = [room.room_id for room in rooms] events = yield self.store.get_linearized_receipts_for_rooms( From 4f973eb6577ba125da6faa7f307b357afe1b7f84 Mon Sep 17 00:00:00 2001 From: Matrix Date: Sat, 18 Jul 2015 19:07:33 +0100 Subject: [PATCH 074/266] Up default cache size for _RoomStreamChangeCache --- synapse/storage/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index d515a0a15..7a6af98d9 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -300,7 +300,7 @@ class _RoomStreamChangeCache(object): may have changed since that key. If the key is too old then the cache will simply return all rooms. """ - def __init__(self, size_of_cache=1000): + def __init__(self, size_of_cache=10000): self._size_of_cache = size_of_cache self._room_to_key = {} self._cache = sorteddict() From b6ee0585bd0329e1841196b8e8a893630e1850d6 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 20 Jul 2015 13:55:19 +0100 Subject: [PATCH 075/266] Parse the ID given to /invite|ban|kick to make sure it looks like a user ID. --- synapse/rest/client/v1/room.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 0346afb1b..639795df2 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -412,6 +412,8 @@ class RoomMembershipRestServlet(ClientV1RestServlet): if "user_id" not in content: raise SynapseError(400, "Missing user_id key.") state_key = content["user_id"] + # make sure it looks like a user ID; it'll throw if it's invalid. + UserID.from_string(state_key); if membership_action == "kick": membership_action = "leave" From ddef5ea1267e3ec2df95b4811f1f59755a35639f Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 20 Jul 2015 14:02:36 +0100 Subject: [PATCH 076/266] Remove semicolon. --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 639795df2..b4a70cba9 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -413,7 +413,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): raise SynapseError(400, "Missing user_id key.") state_key = content["user_id"] # make sure it looks like a user ID; it'll throw if it's invalid. - UserID.from_string(state_key); + UserID.from_string(state_key) if membership_action == "kick": membership_action = "leave" From 53d1174aa9f984b2c2b012e9acd1bf52512998f9 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Mon, 20 Jul 2015 06:32:12 -0700 Subject: [PATCH 077/266] Improve naming --- synapse/notifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/notifier.py b/synapse/notifier.py index 85ae34313..dbd8efe9f 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -352,10 +352,10 @@ class Notifier(object): after_id = getattr(after_token, keyname) if before_id == after_id: continue - stuff, new_key = yield source.get_new_events_for_user( + new_events, new_key = yield source.get_new_events_for_user( user, getattr(from_token, keyname), limit, ) - events.extend(stuff) + events.extend(new_events) end_token = end_token.copy_and_replace(keyname, new_key) if events: From 3b5823c74d5bffc68068284145cc78a33476ac84 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 14 Jul 2015 13:08:33 +0100 Subject: [PATCH 078/266] s/take/claim/ for end to end key APIs --- synapse/rest/client/v2_alpha/keys.py | 10 +++++----- synapse/storage/end_to_end_keys.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index f03126775..9a0c84228 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -207,9 +207,9 @@ class KeyQueryServlet(RestServlet): class OneTimeKeyServlet(RestServlet): """ - GET /keys/take/// HTTP/1.1 + GET /keys/claim/// HTTP/1.1 - POST /keys/take HTTP/1.1 + POST /keys/claim HTTP/1.1 { "one_time_keys": { "": { @@ -226,7 +226,7 @@ class OneTimeKeyServlet(RestServlet): """ PATTERN = client_v2_pattern( - "/keys/take(?:/?|(?:/" + "/keys/claim(?:/?|(?:/" "(?P[^/]*)/(?P[^/]*)/(?P[^/]*)" ")?)" ) @@ -240,7 +240,7 @@ class OneTimeKeyServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id, device_id, algorithm): yield self.auth.get_user_by_req(request) - results = yield self.store.take_e2e_one_time_keys( + results = yield self.store.claim_e2e_one_time_keys( [(user_id, device_id, algorithm)] ) defer.returnValue(self.json_result(request, results)) @@ -256,7 +256,7 @@ class OneTimeKeyServlet(RestServlet): for user_id, device_keys in body.get("one_time_keys", {}).items(): for device_id, algorithm in device_keys.items(): query.append((user_id, device_id, algorithm)) - results = yield self.store.take_e2e_one_time_keys(query) + results = yield self.store.claim_e2e_one_time_keys(query) defer.returnValue(self.json_result(request, results)) def json_result(self, request, results): diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 99dc864e4..325740d7d 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -95,9 +95,9 @@ class EndToEndKeyStore(SQLBaseStore): "count_e2e_one_time_keys", _count_e2e_one_time_keys ) - def take_e2e_one_time_keys(self, query_list): + def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" - def _take_e2e_one_time_keys(txn): + def _claim_e2e_one_time_keys(txn): sql = ( "SELECT key_id, key_json FROM e2e_one_time_keys_json" " WHERE user_id = ? AND device_id = ? AND algorithm = ?" @@ -121,5 +121,5 @@ class EndToEndKeyStore(SQLBaseStore): txn.execute(sql, (user_id, device_id, algorithm, key_id)) return result return self.runInteraction( - "take_e2e_one_time_keys", _take_e2e_one_time_keys + "claim_e2e_one_time_keys", _claim_e2e_one_time_keys ) From cf7a40b08a381ee5715f915effd63dfe241a8d61 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 21 Jul 2015 16:08:00 -0700 Subject: [PATCH 079/266] I think this was what was intended... --- synapse/storage/end_to_end_keys.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 325740d7d..69287c43d 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -52,6 +52,7 @@ class EndToEndKeyStore(SQLBaseStore): ) for row in rows: user_result[row["device_id"]] = row["key_json"] + result[user_id] = user_result return result return self.runInteraction("get_e2e_device_keys", _get_e2e_device_keys) From 20c0324e9c0ebe569d30bd6541bc8b5c9d3c7ae2 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 21 Jul 2015 16:21:37 -0700 Subject: [PATCH 080/266] Dodesn't seem to make any difference: guess it does work with the object reference --- synapse/storage/end_to_end_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 69287c43d..325740d7d 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -52,7 +52,6 @@ class EndToEndKeyStore(SQLBaseStore): ) for row in rows: user_result[row["device_id"]] = row["key_json"] - result[user_id] = user_result return result return self.runInteraction("get_e2e_device_keys", _get_e2e_device_keys) From a56eccbbfc377d071a403fde875e02cf8fb1b68a Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 21 Jul 2015 16:38:16 -0700 Subject: [PATCH 081/266] Query for all the ones we were asked about, not just the last... --- synapse/rest/client/v2_alpha/keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 9a0c84228..5f3a6207b 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -180,7 +180,7 @@ class KeyQueryServlet(RestServlet): else: for device_id in device_ids: query.append((user_id, device_id)) - results = yield self.store.get_e2e_device_keys([(user_id, device_id)]) + results = yield self.store.get_e2e_device_keys(query) defer.returnValue(self.json_result(request, results)) @defer.inlineCallbacks From 4e2e67fd506899e0c64b5fdfcac91e660de4260b Mon Sep 17 00:00:00 2001 From: Matrix Date: Wed, 22 Jul 2015 16:13:46 +0100 Subject: [PATCH 082/266] Disable receipts for now --- synapse/handlers/receipts.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 86c911c4b..415dd339f 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -171,6 +171,7 @@ class ReceiptEventSource(object): @defer.inlineCallbacks def get_new_events_for_user(self, user, from_key, limit): + defer.returnValue(([], from_key)) from_key = int(from_key) to_key = yield self.get_current_key() @@ -193,6 +194,7 @@ class ReceiptEventSource(object): @defer.inlineCallbacks def get_pagination_rows(self, user, config, key): to_key = int(config.from_key) + defer.returnValue(([], to_key)) if config.to_key: from_key = int(config.to_key) From 103e1c2431e92959c8b265335714c1a2c5d5dd70 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 23 Jul 2015 11:12:49 +0100 Subject: [PATCH 083/266] Pick larger than desired thumbnail for 'crop' --- synapse/rest/media/v1/thumbnail_resource.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 4a9b6d8ee..61f88e486 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -162,11 +162,12 @@ class ThumbnailResource(BaseMediaResource): t_method = info["thumbnail_method"] if t_method == "scale" or t_method == "crop": aspect_quality = abs(d_w * t_h - d_h * t_w) + min_quality = 0 if d_w <= t_w and d_h <= t_h else 1 size_quality = abs((d_w - t_w) * (d_h - t_h)) type_quality = desired_type != info["thumbnail_type"] length_quality = info["thumbnail_length"] info_list.append(( - aspect_quality, size_quality, type_quality, + aspect_quality, min_quality, size_quality, type_quality, length_quality, info )) if info_list: From ff7c2e41de2056ab959a2d560c89d397425c61be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 23 Jul 2015 14:12:49 +0100 Subject: [PATCH 084/266] Always return a thumbnail of the requested size. Before, we returned a thumbnail that was at least as big (if possible) as the requested size. Now, if we don't have a thumbnail of the given size we generate (and persist) one of that size. --- synapse/rest/media/v1/base_resource.py | 83 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 81 +++++++++++++++++++- 2 files changed, 162 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index c43ae0314..00668b386 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -225,6 +225,89 @@ class BaseMediaResource(Resource): else: return () + @defer.inlineCallbacks + def _generate_local_exact_thumbnail(self, media_id, t_width, t_height, + t_method, t_type): + input_path = self.filepaths.local_media_filepath(media_id) + + def thumbnail(): + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + + if t_method == "crop": + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + elif t_method == "scale": + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + else: + t_len = None + + return t_len, t_path + + res = yield threads.deferToThread(thumbnail) + + if res: + t_len, t_path = res + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue(t_path) + + @defer.inlineCallbacks + def _generate_remote_exact_thumbnail(self, server_name, file_id, media_id, + t_width, t_height, t_method, t_type): + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + + def thumbnail(): + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + t_path = self.filepaths.remote_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + + if t_method == "crop": + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + elif t_method == "scale": + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + else: + t_len = None + + return t_path, t_len + + res = yield threads.deferToThread(thumbnail) + + if res: + t_path, t_len = res + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue(t_path) + @defer.inlineCallbacks def _generate_local_thumbnails(self, media_id, media_info): media_type = media_info["media_type"] diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 61f88e486..58621f45d 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -43,11 +43,11 @@ class ThumbnailResource(BaseMediaResource): m_type = parse_string(request, "type", "image/png") if server_name == self.server_name: - yield self._respond_local_thumbnail( + yield self._select_or_generate_local_thumbnail( request, media_id, width, height, method, m_type ) else: - yield self._respond_remote_thumbnail( + yield self._select_or_generate_remote_thumbnail( request, server_name, media_id, width, height, method, m_type ) @@ -82,6 +82,83 @@ class ThumbnailResource(BaseMediaResource): request, media_info, width, height, method, m_type, ) + @defer.inlineCallbacks + def _select_or_generate_local_thumbnail(self, request, media_id, desired_width, + desired_height, desired_method, + desired_type): + media_info = yield self.store.get_local_media(media_id) + + if not media_info: + self._respond_404(request) + return + + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) + for info in thumbnail_infos: + t_w = info["thumbnail_width"] == desired_width + t_h = info["thumbnail_height"] == desired_height + t_method = info["thumbnail_method"] == desired_method + t_type = info["thumbnail_type"] == desired_type + + if t_w and t_h and t_method and t_type: + file_path = self.filepaths.local_media_thumbnail( + media_id, desired_width, desired_height, desired_type, desired_method, + ) + yield self._respond_with_file(request, desired_type, file_path) + return + + logger.debug("We don't have a local thumbnail of that size. Generating") + + # Okay, so we generate one. + file_path = yield self._generate_local_exact_thumbnail( + media_id, desired_width, desired_height, desired_method, desired_type + ) + + if file_path: + yield self._respond_with_file(request, desired_type, file_path) + else: + yield self._respond_default_thumbnail( + request, media_info, desired_width, desired_height, + desired_method, desired_type, + ) + + @defer.inlineCallbacks + def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, + desired_width, desired_height, + desired_method, desired_type): + media_info = yield self._get_remote_media(server_name, media_id) + + thumbnail_infos = yield self.store.get_remote_media_thumbnails( + server_name, media_id, + ) + + for info in thumbnail_infos: + t_w = info["thumbnail_width"] == desired_width + t_h = info["thumbnail_height"] == desired_height + t_method = info["thumbnail_method"] == desired_method + t_type = info["thumbnail_type"] == desired_type + + if t_w and t_h and t_method and t_type: + file_path = self.filepaths.remote_media_thumbnail( + media_id, desired_width, desired_height, desired_type, desired_method, + ) + yield self._respond_with_file(request, desired_type, file_path) + + logger.debug("We don't have a local thumbnail of that size. Generating") + + # Okay, so we generate one. + path = yield self._generate_remote_exact_thumbnail( + server_name, media_id, desired_width, desired_height, + desired_method, desired_type + ) + + if path: + yield self._respond_with_file(request, t_type, file_path) + else: + yield self._respond_default_thumbnail( + request, media_info, desired_width, desired_height, + desired_method, desired_type, + ) + @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): From 33d83f36158a98111c3dceb605a40d962a9e5812 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 23 Jul 2015 14:24:21 +0100 Subject: [PATCH 085/266] Fix remote thumbnailing --- synapse/rest/media/v1/base_resource.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 00668b386..74c0cd093 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -284,7 +284,7 @@ class BaseMediaResource(Resource): return t_path = self.filepaths.remote_media_thumbnail( - media_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) self._makedirs(t_path) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 58621f45d..9387258a7 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -131,6 +131,8 @@ class ThumbnailResource(BaseMediaResource): server_name, media_id, ) + file_id = media_info["filesystem_id"] + for info in thumbnail_infos: t_w = info["thumbnail_width"] == desired_width t_h = info["thumbnail_height"] == desired_height @@ -139,20 +141,22 @@ class ThumbnailResource(BaseMediaResource): if t_w and t_h and t_method and t_type: file_path = self.filepaths.remote_media_thumbnail( - media_id, desired_width, desired_height, desired_type, desired_method, + server_name, file_id, desired_width, desired_height, + desired_type, desired_method, ) yield self._respond_with_file(request, desired_type, file_path) + return logger.debug("We don't have a local thumbnail of that size. Generating") # Okay, so we generate one. - path = yield self._generate_remote_exact_thumbnail( - server_name, media_id, desired_width, desired_height, - desired_method, desired_type + file_path = yield self._generate_remote_exact_thumbnail( + server_name, file_id, media_id, desired_width, + desired_height, desired_method, desired_type ) - if path: - yield self._respond_with_file(request, t_type, file_path) + if file_path: + yield self._respond_with_file(request, desired_type, file_path) else: yield self._respond_default_thumbnail( request, media_info, desired_width, desired_height, From 2b4f47db9c13840c4b9dbbffb28d7860fe007d68 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 23 Jul 2015 14:52:29 +0100 Subject: [PATCH 086/266] Generate local thumbnails on a thread --- synapse/rest/media/v1/base_resource.py | 77 ++++++++++++++------------ 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index c43ae0314..84e1961a2 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -244,43 +244,52 @@ class BaseMediaResource(Resource): ) return - scales = set() - crops = set() - for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, + local_thumbnails = [] + + def generate_thumbnails(): + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len + )) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) + yield threads.deferToThread(generate_thumbnails) + + for l in local_thumbnails: + yield self.store.store_local_thumbnail(*l) defer.returnValue({ "width": m_width, From 459085184ce80c67584bee4e5d3bc43add99bb0b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 23 Jul 2015 15:59:32 +0100 Subject: [PATCH 087/266] Factor out thumbnail() --- synapse/rest/media/v1/base_resource.py | 96 +++++++++++--------------- 1 file changed, 40 insertions(+), 56 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 74c0cd093..093ba847d 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -225,41 +225,44 @@ class BaseMediaResource(Resource): else: return () + def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + t_method, t_type): + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + if t_method == "crop": + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + elif t_method == "scale": + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + else: + t_len = None + + return t_len + @defer.inlineCallbacks def _generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.local_media_filepath(media_id) - def thumbnail(): - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return + t_len = yield threads.deferToThread( + self._generate_thumbnail, + input_path, t_path, t_width, t_height, t_method, t_type + ) - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - elif t_method == "scale": - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - else: - t_len = None - - return t_len, t_path - - res = yield threads.deferToThread(thumbnail) - - if res: - t_len, t_path = res + if t_len: yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len ) @@ -271,36 +274,17 @@ class BaseMediaResource(Resource): t_width, t_height, t_method, t_type): input_path = self.filepaths.remote_media_filepath(server_name, file_id) - def thumbnail(): - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return + t_len = yield threads.deferToThread( + self._generate_thumbnail, + input_path, t_path, t_width, t_height, t_method, t_type + ) - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - elif t_method == "scale": - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - else: - t_len = None - - return t_path, t_len - - res = yield threads.deferToThread(thumbnail) - - if res: - t_path, t_len = res + if t_len: yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len From 62c010283d543db0956066b42eb735b57c000a82 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 23 Jul 2015 16:03:38 +0100 Subject: [PATCH 088/266] Add federation support for end-to-end key requests --- synapse/federation/federation_client.py | 34 ++++++++ synapse/federation/federation_server.py | 37 +++++++++ synapse/federation/transport/client.py | 70 +++++++++++++++++ synapse/federation/transport/server.py | 20 +++++ synapse/rest/client/v2_alpha/keys.py | 100 +++++++++++++++++------- 5 files changed, 231 insertions(+), 30 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 7736d14fb..21a86a4c6 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -134,6 +134,40 @@ class FederationClient(FederationBase): destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail ) + @log_function + def query_client_keys(self, destination, content, retry_on_dns_fail=True): + """Query device keys for a device hosted on a remote server. + + Args: + destination (str): Domain name of the remote homeserver + content (dict): The query content. + + Returns: + a Deferred which will eventually yield a JSON object from the + response + """ + sent_queries_counter.inc("client_device_keys") + return self.transport_layer.query_client_keys( + destination, content, retry_on_dns_fail=retry_on_dns_fail + ) + + @log_function + def claim_client_keys(self, destination, content, retry_on_dns_fail=True): + """Claims one-time keys for a device hosted on a remote server. + + Args: + destination (str): Domain name of the remote homeserver + content (dict): The query content. + + Returns: + a Deferred which will eventually yield a JSON object from the + response + """ + sent_queries_counter.inc("client_one_time_keys") + return self.transport_layer.claim_client_keys( + destination, content, retry_on_dns_fail=retry_on_dns_fail + ) + @defer.inlineCallbacks @log_function def backfill(self, dest, context, limit, extremities): diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index cd79e23f4..c32908ac2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -27,6 +27,7 @@ from synapse.api.errors import FederationError, SynapseError from synapse.crypto.event_signing import compute_event_signature +import simplejson as json import logging @@ -312,6 +313,42 @@ class FederationServer(FederationBase): (200, send_content) ) + @defer.inlineCallbacks + @log_function + def on_query_client_keys(self, origin, content): + query = [] + for user_id, device_ids in content.get("device_keys", {}).items(): + if not device_ids: + query.append((user_id, None)) + else: + for device_id in device_ids: + query.append((user_id, device_id)) + results = yield self.store.get_e2e_device_keys(query) + json_result = {} + for user_id, device_keys in results.items(): + for device_id, json_bytes in device_keys.items(): + json_result.setdefault(user_id, {})[device_id] = json.loads( + json_bytes + ) + defer.returnValue({"device_keys": json_result}) + + @defer.inlineCallbacks + @log_function + def on_claim_client_keys(self, origin, content): + query = [] + for user_id, device_keys in content.get("one_time_keys", {}).items(): + for device_id, algorithm in device_keys.items(): + query.append((user_id, device_id, algorithm)) + results = yield self.store.claim_e2e_one_time_keys(query) + json_result = {} + for user_id, device_keys in results.items(): + for device_id, keys in device_keys.items(): + for key_id, json_bytes in keys.items(): + json_result.setdefault(user_id, {})[device_id] = { + key_id: json.loads(json_bytes) + } + defer.returnValue({"one_time_keys": json_result}) + @defer.inlineCallbacks @log_function def on_get_missing_events(self, origin, room_id, earliest_events, diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 610a4c316..df5083dd2 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -222,6 +222,76 @@ class TransportLayerClient(object): defer.returnValue(content) + @defer.inlineCallbacks + @log_function + def query_client_keys(self, destination, query_content): + """Query the device keys for a list of user ids hosted on a remote + server. + + Request: + { + "device_keys": { + "": [""] + } } + + Response: + { + "device_keys": { + "": { + "": {...} + } } } + + Args: + destination(str): The server to query. + query_content(dict): The user ids to query. + Returns: + A dict containg the device keys. + """ + path = PREFIX + "/client_keys/query" + + content = yield self.client.post_json( + destination=destination, + path=path, + data=query_content, + ) + defer.returnValue(content) + + @defer.inlineCallbacks + @log_function + def claim_client_keys(self, destination, query_content): + """Claim one-time keys for a list of devices hosted on a remote server. + + Request: + { + "one_time_keys": { + "": { + "": "" + } } } + + Response: + { + "device_keys": { + "": { + "": { + ":": "" + } } } } + + Args: + destination(str): The server to query. + query_content(dict): The user ids to query. + Returns: + A dict containg the one-time keys. + """ + + path = PREFIX + "/client_keys/claim" + + content = yield self.client.post_json( + destination=destination, + path=path, + data=query_content, + ) + defer.returnValue(content) + @defer.inlineCallbacks @log_function def get_missing_events(self, destination, room_id, earliest_events, diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index bad93c6b2..fb59383ec 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -325,6 +325,24 @@ class FederationInviteServlet(BaseFederationServlet): defer.returnValue((200, content)) +class FederationClientKeysQueryServlet(BaseFederationServlet): + PATH = "/client_keys/query" + + @defer.inlineCallbacks + def on_POST(self, origin, content): + response = yield self.handler.on_client_key_query(origin, content) + defer.returnValue((200, response)) + + +class FederationClientKeysClaimServlet(BaseFederationServlet): + PATH = "/client_keys/claim" + + @defer.inlineCallbacks + def on_POST(self, origin, content): + response = yield self.handler.on_client_key_claim(origin, content) + defer.returnValue((200, response)) + + class FederationQueryAuthServlet(BaseFederationServlet): PATH = "/query_auth/([^/]*)/([^/]*)" @@ -373,4 +391,6 @@ SERVLET_CLASSES = ( FederationQueryAuthServlet, FederationGetMissingEventsServlet, FederationEventAuthServlet, + FederationClientKeysQueryServlet, + FederationClientKeysClaimServlet, ) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 5f3a6207b..739a08ada 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -17,6 +17,7 @@ from twisted.internet import defer from synapse.api.errors import SynapseError from synapse.http.servlet import RestServlet +from synapse.types import UserID from syutil.jsonutil import encode_canonical_json from ._base import client_v2_pattern @@ -164,45 +165,63 @@ class KeyQueryServlet(RestServlet): super(KeyQueryServlet, self).__init__() self.store = hs.get_datastore() self.auth = hs.get_auth() + self.federation = hs.get_replication_layer() + self.is_mine = hs.is_mine @defer.inlineCallbacks def on_POST(self, request, user_id, device_id): - logger.debug("onPOST") yield self.auth.get_user_by_req(request) try: body = json.loads(request.content.read()) except: raise SynapseError(400, "Invalid key JSON") - query = [] - for user_id, device_ids in body.get("device_keys", {}).items(): - if not device_ids: - query.append((user_id, None)) - else: - for device_id in device_ids: - query.append((user_id, device_id)) - results = yield self.store.get_e2e_device_keys(query) - defer.returnValue(self.json_result(request, results)) + result = yield self.handle_request(body) + defer.returnValue(result) @defer.inlineCallbacks def on_GET(self, request, user_id, device_id): auth_user, client_info = yield self.auth.get_user_by_req(request) auth_user_id = auth_user.to_string() - if not user_id: - user_id = auth_user_id - if not device_id: - device_id = None - # Returns a map of user_id->device_id->json_bytes. - results = yield self.store.get_e2e_device_keys([(user_id, device_id)]) - defer.returnValue(self.json_result(request, results)) + user_id = user_id if user_id else auth_user_id + device_ids = [device_id] if device_id else [] + result = yield self.handle_request( + {"device_keys": {user_id: device_ids}} + ) + defer.returnValue(result) + + @defer.inlineCallbacks + def handle_request(self, body): + local_query = [] + remote_queries = {} + for user_id, device_ids in body.get("device_keys", {}).items(): + user = UserID.from_string(user_id) + if self.is_mine(user): + if not device_ids: + local_query.append((user_id, None)) + else: + for device_id in device_ids: + local_query.append((user_id, device_id)) + else: + remote_queries.set_default(user.domain, {})[user_id] = list( + device_ids + ) + results = yield self.store.get_e2e_device_keys(local_query) - def json_result(self, request, results): json_result = {} for user_id, device_keys in results.items(): for device_id, json_bytes in device_keys.items(): json_result.setdefault(user_id, {})[device_id] = json.loads( json_bytes ) - return (200, {"device_keys": json_result}) + + for destination, device_keys in remote_queries.items(): + remote_result = yield self.federation.query_client_keys( + destination, {"device_keys": device_keys} + ) + for user_id, keys in remote_result.items(): + if user_id in device_keys: + json_result[user_id] = keys + defer.returnValue((200, {"device_keys": json_result})) class OneTimeKeyServlet(RestServlet): @@ -236,14 +255,16 @@ class OneTimeKeyServlet(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() self.clock = hs.get_clock() + self.federation = hs.get_replication_layer() + self.is_mine = hs.is_mine @defer.inlineCallbacks def on_GET(self, request, user_id, device_id, algorithm): yield self.auth.get_user_by_req(request) - results = yield self.store.claim_e2e_one_time_keys( - [(user_id, device_id, algorithm)] + result = yield self.handle_request( + {"one_time_keys": {user_id: {device_id: algorithm}}} ) - defer.returnValue(self.json_result(request, results)) + defer.returnValue(result) @defer.inlineCallbacks def on_POST(self, request, user_id, device_id, algorithm): @@ -252,14 +273,24 @@ class OneTimeKeyServlet(RestServlet): body = json.loads(request.content.read()) except: raise SynapseError(400, "Invalid key JSON") - query = [] - for user_id, device_keys in body.get("one_time_keys", {}).items(): - for device_id, algorithm in device_keys.items(): - query.append((user_id, device_id, algorithm)) - results = yield self.store.claim_e2e_one_time_keys(query) - defer.returnValue(self.json_result(request, results)) + result = yield self.handle_request(body) + defer.returnValue(result) + + @defer.inlineCallbacks + def handle_request(self, body): + local_query = [] + remote_queries = {} + for user_id, device_keys in body.get("one_time_keys", {}).items(): + user = UserID.from_string(user_id) + if self.is_mine(user): + for device_id, algorithm in device_keys.items(): + local_query.append((user_id, device_id, algorithm)) + else: + remote_queries.set_default(user.domain, {})[user_id] = ( + device_keys + ) + results = yield self.store.claim_e2e_one_time_keys(local_query) - def json_result(self, request, results): json_result = {} for user_id, device_keys in results.items(): for device_id, keys in device_keys.items(): @@ -267,7 +298,16 @@ class OneTimeKeyServlet(RestServlet): json_result.setdefault(user_id, {})[device_id] = { key_id: json.loads(json_bytes) } - return (200, {"one_time_keys": json_result}) + + for destination, device_keys in remote_queries.items(): + remote_result = yield self.federation.query_client_keys( + destination, {"one_time_keys": device_keys} + ) + for user_id, keys in remote_result.items(): + if user_id in device_keys: + json_result[user_id] = keys + + defer.returnValue((200, {"one_time_keys": json_result})) def register_servlets(hs, http_server): From 2da3b1e60bf7e9ae1d6714abcff0a0c224cadf28 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 24 Jul 2015 18:26:46 +0100 Subject: [PATCH 089/266] Get the end-to-end key federation working --- synapse/federation/federation_client.py | 12 ++++-------- synapse/federation/transport/client.py | 4 ++-- synapse/federation/transport/server.py | 12 ++++++------ synapse/rest/client/v2_alpha/keys.py | 10 +++++----- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 21a86a4c6..44e4d0755 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -135,7 +135,7 @@ class FederationClient(FederationBase): ) @log_function - def query_client_keys(self, destination, content, retry_on_dns_fail=True): + def query_client_keys(self, destination, content): """Query device keys for a device hosted on a remote server. Args: @@ -147,12 +147,10 @@ class FederationClient(FederationBase): response """ sent_queries_counter.inc("client_device_keys") - return self.transport_layer.query_client_keys( - destination, content, retry_on_dns_fail=retry_on_dns_fail - ) + return self.transport_layer.query_client_keys(destination, content) @log_function - def claim_client_keys(self, destination, content, retry_on_dns_fail=True): + def claim_client_keys(self, destination, content): """Claims one-time keys for a device hosted on a remote server. Args: @@ -164,9 +162,7 @@ class FederationClient(FederationBase): response """ sent_queries_counter.inc("client_one_time_keys") - return self.transport_layer.claim_client_keys( - destination, content, retry_on_dns_fail=retry_on_dns_fail - ) + return self.transport_layer.claim_client_keys(destination, content) @defer.inlineCallbacks @log_function diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index df5083dd2..ced703364 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -247,7 +247,7 @@ class TransportLayerClient(object): Returns: A dict containg the device keys. """ - path = PREFIX + "/client_keys/query" + path = PREFIX + "/user/keys/query" content = yield self.client.post_json( destination=destination, @@ -283,7 +283,7 @@ class TransportLayerClient(object): A dict containg the one-time keys. """ - path = PREFIX + "/client_keys/claim" + path = PREFIX + "/user/keys/claim" content = yield self.client.post_json( destination=destination, diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index fb59383ec..36f250e1a 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -326,20 +326,20 @@ class FederationInviteServlet(BaseFederationServlet): class FederationClientKeysQueryServlet(BaseFederationServlet): - PATH = "/client_keys/query" + PATH = "/user/keys/query" @defer.inlineCallbacks - def on_POST(self, origin, content): - response = yield self.handler.on_client_key_query(origin, content) + def on_POST(self, origin, content, query): + response = yield self.handler.on_query_client_keys(origin, content) defer.returnValue((200, response)) class FederationClientKeysClaimServlet(BaseFederationServlet): - PATH = "/client_keys/claim" + PATH = "/user/keys/claim" @defer.inlineCallbacks - def on_POST(self, origin, content): - response = yield self.handler.on_client_key_claim(origin, content) + def on_POST(self, origin, content, query): + response = yield self.handler.on_claim_client_keys(origin, content) defer.returnValue((200, response)) diff --git a/synapse/rest/client/v2_alpha/keys.py b/synapse/rest/client/v2_alpha/keys.py index 739a08ada..718928eed 100644 --- a/synapse/rest/client/v2_alpha/keys.py +++ b/synapse/rest/client/v2_alpha/keys.py @@ -202,7 +202,7 @@ class KeyQueryServlet(RestServlet): for device_id in device_ids: local_query.append((user_id, device_id)) else: - remote_queries.set_default(user.domain, {})[user_id] = list( + remote_queries.setdefault(user.domain, {})[user_id] = list( device_ids ) results = yield self.store.get_e2e_device_keys(local_query) @@ -218,7 +218,7 @@ class KeyQueryServlet(RestServlet): remote_result = yield self.federation.query_client_keys( destination, {"device_keys": device_keys} ) - for user_id, keys in remote_result.items(): + for user_id, keys in remote_result["device_keys"].items(): if user_id in device_keys: json_result[user_id] = keys defer.returnValue((200, {"device_keys": json_result})) @@ -286,7 +286,7 @@ class OneTimeKeyServlet(RestServlet): for device_id, algorithm in device_keys.items(): local_query.append((user_id, device_id, algorithm)) else: - remote_queries.set_default(user.domain, {})[user_id] = ( + remote_queries.setdefault(user.domain, {})[user_id] = ( device_keys ) results = yield self.store.claim_e2e_one_time_keys(local_query) @@ -300,10 +300,10 @@ class OneTimeKeyServlet(RestServlet): } for destination, device_keys in remote_queries.items(): - remote_result = yield self.federation.query_client_keys( + remote_result = yield self.federation.claim_client_keys( destination, {"one_time_keys": device_keys} ) - for user_id, keys in remote_result.items(): + for user_id, keys in remote_result["one_time_keys"].items(): if user_id in device_keys: json_result[user_id] = keys From 39e21ea51cf94f436f1f845f24c1b04f11b92c6f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Jul 2015 13:57:29 +0100 Subject: [PATCH 090/266] Add support for using keyword arguments with cached functions --- synapse/storage/_base.py | 40 ++++++++++++++++++++++++++++++------ synapse/storage/keys.py | 5 ++--- synapse/storage/push_rule.py | 8 +++----- synapse/storage/receipts.py | 5 ++--- synapse/storage/room.py | 5 ++--- synapse/storage/state.py | 5 ++--- 6 files changed, 45 insertions(+), 23 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 8f812f0fd..f1265541b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -27,6 +27,7 @@ from twisted.internet import defer from collections import namedtuple, OrderedDict import functools +import inspect import sys import time import threading @@ -141,13 +142,28 @@ class CacheDescriptor(object): which can be used to insert values into the cache specifically, without calling the calculation function. """ - def __init__(self, orig, max_entries=1000, num_args=1, lru=False): + def __init__(self, orig, max_entries=1000, num_args=1, lru=False, + inlineCallbacks=False): self.orig = orig + if inlineCallbacks: + self.function_to_call = defer.inlineCallbacks(orig) + else: + self.function_to_call = orig + self.max_entries = max_entries self.num_args = num_args self.lru = lru + self.arg_names = inspect.getargspec(orig).args[1:num_args+1] + + if len(self.arg_names) < self.num_args: + raise Exception( + "Not enough explicit positional arguments to key off of for %r." + " (@cached cannot key off of *args or **kwars)" + % (orig.__name__,) + ) + def __get__(self, obj, objtype=None): cache = Cache( name=self.orig.__name__, @@ -158,11 +174,13 @@ class CacheDescriptor(object): @functools.wraps(self.orig) @defer.inlineCallbacks - def wrapped(*keyargs): + def wrapped(*args, **kwargs): + arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) + keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] try: - cached_result = cache.get(*keyargs[:self.num_args]) + cached_result = cache.get(*keyargs) if DEBUG_CACHES: - actual_result = yield self.orig(obj, *keyargs) + actual_result = yield self.function_to_call(obj, *args, **kwargs) if actual_result != cached_result: logger.error( "Stale cache entry %s%r: cached: %r, actual %r", @@ -177,9 +195,9 @@ class CacheDescriptor(object): # while the SELECT is executing (SYN-369) sequence = cache.sequence - ret = yield self.orig(obj, *keyargs) + ret = yield self.function_to_call(obj, *args, **kwargs) - cache.update(sequence, *keyargs[:self.num_args] + (ret,)) + cache.update(sequence, *(keyargs + [ret])) defer.returnValue(ret) @@ -201,6 +219,16 @@ def cached(max_entries=1000, num_args=1, lru=False): ) +def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False): + return lambda orig: CacheDescriptor( + orig, + max_entries=max_entries, + num_args=num_args, + lru=lru, + inlineCallbacks=True, + ) + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 940a5f7e0..e3f98f0cd 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from _base import SQLBaseStore, cached +from _base import SQLBaseStore, cachedInlineCallbacks from twisted.internet import defer @@ -71,8 +71,7 @@ class KeyStore(SQLBaseStore): desc="store_server_certificate", ) - @cached() - @defer.inlineCallbacks + @cachedInlineCallbacks() def get_all_server_verify_keys(self, server_name): rows = yield self._simple_select_list( table="server_signature_keys", diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 4cac118d1..a220f3632 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore, cachedInlineCallbacks from twisted.internet import defer import logging @@ -23,8 +23,7 @@ logger = logging.getLogger(__name__) class PushRuleStore(SQLBaseStore): - @cached() - @defer.inlineCallbacks + @cachedInlineCallbacks() def get_push_rules_for_user(self, user_name): rows = yield self._simple_select_list( table=PushRuleTable.table_name, @@ -41,8 +40,7 @@ class PushRuleStore(SQLBaseStore): defer.returnValue(rows) - @cached() - @defer.inlineCallbacks + @cachedInlineCallbacks() def get_push_rules_enabled_for_user(self, user_name): results = yield self._simple_select_list( table=PushRuleEnableTable.table_name, diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 7a6af98d9..b79d6683c 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore, cachedInlineCallbacks from twisted.internet import defer @@ -128,8 +128,7 @@ class ReceiptsStore(SQLBaseStore): def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_max_token(self) - @cached - @defer.inlineCallbacks + @cachedInlineCallbacks() def get_graph_receipts_for_room(self, room_id): """Get receipts for sending to remote servers. """ diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 4612a8aa8..dd5bc2c8f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore, cachedInlineCallbacks import collections import logging @@ -186,8 +186,7 @@ class RoomStore(SQLBaseStore): } ) - @cached() - @defer.inlineCallbacks + @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): sql = ( diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 47bec6549..55c6d5289 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore, cached, cachedInlineCallbacks from twisted.internet import defer @@ -189,8 +189,7 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @cached(num_args=3) - @defer.inlineCallbacks + @cachedInlineCallbacks(num_args=3) def get_current_state_for_key(self, room_id, event_type, state_key): def f(txn): sql = ( From a4d62ba36afc54d4e60f1371fe9b31e8b8e6834c Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 28 Jul 2015 17:34:12 +0100 Subject: [PATCH 091/266] Fix v2_alpha registration. Add unit tests. V2 Registration forced everyone (including ASes) to create a password for a user, when ASes should be able to omit passwords. Also unbreak AS registration in general which checked too early if the given username was claimed by an AS; it was checked before knowing if the AS was the one doing the registration! Add unit tests for AS reg, user reg and disabled_registration flag. --- synapse/handlers/register.py | 3 +- synapse/rest/client/v2_alpha/register.py | 124 ++++++++++-------- tests/rest/client/v2_alpha/test_register.py | 132 ++++++++++++++++++++ 3 files changed, 204 insertions(+), 55 deletions(-) create mode 100644 tests/rest/client/v2_alpha/test_register.py diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index a1288b425..f81d75017 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -73,7 +73,8 @@ class RegistrationHandler(BaseHandler): localpart : The local part of the user ID to register. If None, one will be randomly generated. password (str) : The password to assign to this user so they can - login again. + login again. This can be None which means they cannot login again + via a password (e.g. the user is an application service user). Returns: A tuple of (user_id, access_token). Raises: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 0c737d73b..e1c42dd51 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -19,7 +19,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import SynapseError, Codes from synapse.http.servlet import RestServlet -from ._base import client_v2_pattern, parse_request_allow_empty +from ._base import client_v2_pattern, parse_json_dict_from_request import logging import hmac @@ -55,30 +55,52 @@ class RegisterRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): yield run_on_reactor() + body = parse_json_dict_from_request(request) - body = parse_request_allow_empty(request) - # we do basic sanity checks here because the auth - # layer will store these in sessions + # we do basic sanity checks here because the auth layer will store these + # in sessions. Pull out the username/password provided to us. + desired_password = None if 'password' in body: - if ((not isinstance(body['password'], str) and - not isinstance(body['password'], unicode)) or + if (not isinstance(body['password'], basestring) or len(body['password']) > 512): raise SynapseError(400, "Invalid password") + desired_password = body["password"] + desired_username = None if 'username' in body: - if ((not isinstance(body['username'], str) and - not isinstance(body['username'], unicode)) or + if (not isinstance(body['username'], basestring) or len(body['username']) > 512): raise SynapseError(400, "Invalid username") desired_username = body['username'] - yield self.registration_handler.check_username(desired_username) - is_using_shared_secret = False - is_application_server = False - - service = None + appservice = None if 'access_token' in request.args: - service = yield self.auth.get_appservice_by_req(request) + appservice = yield self.auth.get_appservice_by_req(request) + + # fork off as soon as possible for ASes and shared secret auth which + # have completely different registration flows to normal users + + # == Application Service Registration == + if appservice: + result = yield self._do_appservice_registration(desired_username) + defer.returnValue((200, result)) # we throw for non 200 responses + return + + # == Shared Secret Registration == (e.g. create new user scripts) + if 'mac' in body: + # FIXME: Should we really be determining if this is shared secret + # auth based purely on the 'mac' key? + result = yield self._do_shared_secret_registration( + desired_username, desired_password, body["mac"] + ) + defer.returnValue((200, result)) # we throw for non 200 responses + return + + # == Normal User Registration == (everyone else) + if self.hs.config.disable_registration: + raise SynapseError(403, "Registration has been disabled") + + yield self.registration_handler.check_username(desired_username) if self.hs.config.enable_registration_captcha: flows = [ @@ -91,39 +113,20 @@ class RegisterRestServlet(RestServlet): [LoginType.EMAIL_IDENTITY] ] - result = None - if service: - is_application_server = True - params = body - elif 'mac' in body: - # Check registration-specific shared secret auth - if 'username' not in body: - raise SynapseError(400, "", Codes.MISSING_PARAM) - self._check_shared_secret_auth( - body['username'], body['mac'] - ) - is_using_shared_secret = True - params = body - else: - authed, result, params = yield self.auth_handler.check_auth( - flows, body, self.hs.get_ip_from_request(request) - ) - - if not authed: - defer.returnValue((401, result)) - - can_register = ( - not self.hs.config.disable_registration - or is_application_server - or is_using_shared_secret + authed, result, params = yield self.auth_handler.check_auth( + flows, body, self.hs.get_ip_from_request(request) ) - if not can_register: - raise SynapseError(403, "Registration has been disabled") + if not authed: + defer.returnValue((401, result)) + return + + # NB: This may be from the auth handler and NOT from the POST if 'password' not in params: - raise SynapseError(400, "", Codes.MISSING_PARAM) - desired_username = params['username'] if 'username' in params else None - new_password = params['password'] + raise SynapseError(400, "Missing password.", Codes.MISSING_PARAM) + + desired_username = params.get("username", None) + new_password = params.get("password", None) (user_id, token) = yield self.registration_handler.register( localpart=desired_username, @@ -156,18 +159,21 @@ class RegisterRestServlet(RestServlet): else: logger.info("bind_email not specified: not binding email") - result = { - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - } - + result = self._create_registration_details(user_id, token) defer.returnValue((200, result)) def on_OPTIONS(self, _): return 200, {} - def _check_shared_secret_auth(self, username, mac): + @defer.inlineCallbacks + def _do_appservice_registration(self, username): + (user_id, token) = yield self.registration_handler.register( + localpart=username + ) + defer.returnValue(self._create_registration_details(user_id, token)) + + @defer.inlineCallbacks + def _do_shared_secret_registration(self, username, password, mac): if not self.hs.config.registration_shared_secret: raise SynapseError(400, "Shared secret registration is not enabled") @@ -183,13 +189,23 @@ class RegisterRestServlet(RestServlet): digestmod=sha1, ).hexdigest() - if compare_digest(want_mac, got_mac): - return True - else: + if not compare_digest(want_mac, got_mac): raise SynapseError( 403, "HMAC incorrect", ) + (user_id, token) = yield self.registration_handler.register( + localpart=username, password=password + ) + defer.returnValue(self._create_registration_details(user_id, token)) + + def _create_registration_details(self, user_id, token): + return { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + } + def register_servlets(hs, http_server): RegisterRestServlet(hs).register(http_server) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py new file mode 100644 index 000000000..3edc2ec2e --- /dev/null +++ b/tests/rest/client/v2_alpha/test_register.py @@ -0,0 +1,132 @@ +from synapse.rest.client.v2_alpha.register import RegisterRestServlet +from synapse.api.errors import SynapseError +from twisted.internet import defer +from mock import Mock, MagicMock +from tests import unittest +import json + + +class RegisterRestServletTestCase(unittest.TestCase): + + def setUp(self): + # do the dance to hook up request data to self.request_data + self.request_data = "" + self.request = Mock( + content=Mock(read=Mock(side_effect=lambda: self.request_data)), + ) + self.request.args = {} + + self.appservice = None + self.auth = Mock(get_appservice_by_req=Mock( + side_effect=lambda x: defer.succeed(self.appservice)) + ) + + self.auth_result = (False, None, None) + self.auth_handler = Mock( + check_auth=Mock(side_effect=lambda x,y,z: self.auth_result) + ) + self.registration_handler = Mock() + self.identity_handler = Mock() + self.login_handler = Mock() + + # do the dance to hook it up to the hs global + self.handlers = Mock( + auth_handler=self.auth_handler, + registration_handler=self.registration_handler, + identity_handler=self.identity_handler, + login_handler=self.login_handler + ) + self.hs = Mock() + self.hs.hostname = "superbig~testing~thing.com" + self.hs.get_auth = Mock(return_value=self.auth) + self.hs.get_handlers = Mock(return_value=self.handlers) + self.hs.config.disable_registration = False + + # init the thing we're testing + self.servlet = RegisterRestServlet(self.hs) + + @defer.inlineCallbacks + def test_POST_appservice_registration_valid(self): + user_id = "@kermit:muppet" + token = "kermits_access_token" + self.request.args = { + "access_token": "i_am_an_app_service" + } + self.request_data = json.dumps({ + "username": "kermit" + }) + self.appservice = { + "id": "1234" + } + self.registration_handler.register = Mock(return_value=(user_id, token)) + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (200, { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname + })) + + @defer.inlineCallbacks + def test_POST_appservice_registration_invalid(self): + self.request.args = { + "access_token": "i_am_an_app_service" + } + self.request_data = json.dumps({ + "username": "kermit" + }) + self.appservice = None # no application service exists + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (401, None)) + + def test_POST_bad_password(self): + self.request_data = json.dumps({ + "username": "kermit", + "password": 666 + }) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) + + def test_POST_bad_username(self): + self.request_data = json.dumps({ + "username": 777, + "password": "monkey" + }) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) + + @defer.inlineCallbacks + def test_POST_user_valid(self): + user_id = "@kermit:muppet" + token = "kermits_access_token" + self.request_data = json.dumps({ + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.check_username = Mock(return_value=True) + self.auth_result = (True, None, { + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.register = Mock(return_value=(user_id, token)) + + result = yield self.servlet.on_POST(self.request) + self.assertEquals(result, (200, { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname + })) + + def test_POST_disabled_registration(self): + self.hs.config.disable_registration = True + self.request_data = json.dumps({ + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.check_username = Mock(return_value=True) + self.auth_result = (True, None, { + "username": "kermit", + "password": "monkey" + }) + self.registration_handler.register = Mock(return_value=("@user:id", "t")) + d = self.servlet.on_POST(self.request) + return self.assertFailure(d, SynapseError) \ No newline at end of file From c772dffc9f024ce3135039c9f8032f09cc730841 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 29 Jul 2015 09:39:55 +0100 Subject: [PATCH 092/266] improve OS X instructions and remove all the leading $'s to make it easier to c+p commands --- README.rst | 92 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/README.rst b/README.rst index 21e347968..5ff53f2df 100644 --- a/README.rst +++ b/README.rst @@ -101,25 +101,26 @@ header files for python C extensions. Installing prerequisites on Ubuntu or Debian:: - $ sudo apt-get install build-essential python2.7-dev libffi-dev \ - python-pip python-setuptools sqlite3 \ - libssl-dev python-virtualenv libjpeg-dev + sudo apt-get install build-essential python2.7-dev libffi-dev \ + python-pip python-setuptools sqlite3 \ + libssl-dev python-virtualenv libjpeg-dev Installing prerequisites on ArchLinux:: - $ sudo pacman -S base-devel python2 python-pip \ - python-setuptools python-virtualenv sqlite3 + sudo pacman -S base-devel python2 python-pip \ + python-setuptools python-virtualenv sqlite3 Installing prerequisites on Mac OS X:: - $ xcode-select --install - $ sudo pip install virtualenv + xcode-select --install + sudo easy_install pip + sudo pip install virtualenv To install the synapse homeserver run:: - $ virtualenv -p python2.7 ~/.synapse - $ source ~/.synapse/bin/activate - $ pip install --process-dependency-links https://github.com/matrix-org/synapse/tarball/master + virtualenv -p python2.7 ~/.synapse + source ~/.synapse/bin/activate + pip install --process-dependency-links https://github.com/matrix-org/synapse/tarball/master This installs synapse, along with the libraries it uses, into a virtual environment under ``~/.synapse``. Feel free to pick a different directory @@ -132,8 +133,8 @@ above in Docker at https://registry.hub.docker.com/u/silviof/docker-matrix/. To set up your homeserver, run (in your virtualenv, as before):: - $ cd ~/.synapse - $ python -m synapse.app.homeserver \ + cd ~/.synapse + python -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config @@ -192,9 +193,9 @@ Running Synapse To actually run your new homeserver, pick a working directory for Synapse to run (e.g. ``~/.synapse``), and:: - $ cd ~/.synapse - $ source ./bin/activate - $ synctl start + cd ~/.synapse + source ./bin/activate + synctl start Platform Specific Instructions ============================== @@ -212,12 +213,12 @@ defaults to python 3, but synapse currently assumes python 2.7 by default: pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):: - $ sudo pip2.7 install --upgrade pip + sudo pip2.7 install --upgrade pip You also may need to explicitly specify python 2.7 again during the install request:: - $ pip2.7 install --process-dependency-links \ + pip2.7 install --process-dependency-links \ https://github.com/matrix-org/synapse/tarball/master If you encounter an error with lib bcrypt causing an Wrong ELF Class: @@ -225,13 +226,13 @@ ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly compile it under the right architecture. (This should not be needed if installing under virtualenv):: - $ sudo pip2.7 uninstall py-bcrypt - $ sudo pip2.7 install py-bcrypt + sudo pip2.7 uninstall py-bcrypt + sudo pip2.7 install py-bcrypt During setup of Synapse you need to call python2.7 directly again:: - $ cd ~/.synapse - $ python2.7 -m synapse.app.homeserver \ + cd ~/.synapse + python2.7 -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config @@ -279,22 +280,22 @@ Synapse requires pip 1.7 or later, so if your OS provides too old a version and you get errors about ``error: no such option: --process-dependency-links`` you may need to manually upgrade it:: - $ sudo pip install --upgrade pip + sudo pip install --upgrade pip If pip crashes mid-installation for reason (e.g. lost terminal), pip may refuse to run until you remove the temporary installation directory it created. To reset the installation:: - $ rm -rf /tmp/pip_install_matrix + rm -rf /tmp/pip_install_matrix pip seems to leak *lots* of memory during installation. For instance, a Linux host with 512MB of RAM may run out of memory whilst installing Twisted. If this happens, you will have to individually install the dependencies which are failing, e.g.:: - $ pip install twisted + pip install twisted -On OSX, if you encounter clang: error: unknown argument: '-mno-fused-madd' you +On OS X, if you encounter clang: error: unknown argument: '-mno-fused-madd' you will need to export CFLAGS=-Qunused-arguments. Troubleshooting Running @@ -310,10 +311,11 @@ correctly, causing all tests to fail with errors about missing "sodium.h". To fix try re-installing from PyPI or directly from (https://github.com/pyca/pynacl):: - $ # Install from PyPI - $ pip install --user --upgrade --force pynacl - $ # Install from github - $ pip install --user https://github.com/pyca/pynacl/tarball/master + # Install from PyPI + pip install --user --upgrade --force pynacl + + # Install from github + pip install --user https://github.com/pyca/pynacl/tarball/master ArchLinux ~~~~~~~~~ @@ -321,7 +323,7 @@ ArchLinux If running `$ synctl start` fails with 'returned non-zero exit status 1', you will need to explicitly call Python2.7 - either running as:: - $ python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml + python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml ...or by editing synctl with the correct python executable. @@ -331,16 +333,16 @@ Synapse Development To check out a synapse for development, clone the git repo into a working directory of your choice:: - $ git clone https://github.com/matrix-org/synapse.git - $ cd synapse + git clone https://github.com/matrix-org/synapse.git + cd synapse Synapse has a number of external dependencies, that are easiest to install using pip and a virtualenv:: - $ virtualenv env - $ source env/bin/activate - $ python synapse/python_dependencies.py | xargs -n1 pip install - $ pip install setuptools_trial mock + virtualenv env + source env/bin/activate + python synapse/python_dependencies.py | xargs -n1 pip install + pip install setuptools_trial mock This will run a process of downloading and installing all the needed dependencies into a virtual env. @@ -348,7 +350,7 @@ dependencies into a virtual env. Once this is done, you may wish to run Synapse's unit tests, to check that everything is installed as it should be:: - $ python setup.py test + python setup.py test This should end with a 'PASSED' result:: @@ -389,11 +391,11 @@ IDs: For the first form, simply pass the required hostname (of the machine) as the --server-name parameter:: - $ python -m synapse.app.homeserver \ + python -m synapse.app.homeserver \ --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config - $ python -m synapse.app.homeserver --config-path homeserver.yaml + python -m synapse.app.homeserver --config-path homeserver.yaml Alternatively, you can run ``synctl start`` to guide you through the process. @@ -410,11 +412,11 @@ record would then look something like:: At this point, you should then run the homeserver with the hostname of this SRV record, as that is the name other machines will expect it to have:: - $ python -m synapse.app.homeserver \ + python -m synapse.app.homeserver \ --server-name YOURDOMAIN \ --config-path homeserver.yaml \ --generate-config - $ python -m synapse.app.homeserver --config-path homeserver.yaml + python -m synapse.app.homeserver --config-path homeserver.yaml You may additionally want to pass one or more "-v" options, in order to @@ -428,7 +430,7 @@ private federation (``localhost:8080``, ``localhost:8081`` and ``localhost:8082``) which you can then access through the webclient running at http://localhost:8080. Simply run:: - $ demo/start.sh + demo/start.sh This is mainly useful just for development purposes. @@ -502,10 +504,10 @@ Building Internal API Documentation Before building internal API documentation install sphinx and sphinxcontrib-napoleon:: - $ pip install sphinx - $ pip install sphinxcontrib-napoleon + pip install sphinx + pip install sphinxcontrib-napoleon Building internal API documentation:: - $ python setup.py build_sphinx + python setup.py build_sphinx From 11b0a3407485e98082bf06d771e5ae2f68106ca7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 29 Jul 2015 10:00:54 +0100 Subject: [PATCH 093/266] Use the same reg paths as register v1 for ASes. Namely this means using registration_handler.appservice_register. --- synapse/rest/client/v2_alpha/register.py | 10 ++++++---- tests/rest/client/v2_alpha/test_register.py | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e1c42dd51..cf54e1dac 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -82,7 +82,9 @@ class RegisterRestServlet(RestServlet): # == Application Service Registration == if appservice: - result = yield self._do_appservice_registration(desired_username) + result = yield self._do_appservice_registration( + desired_username, request.args["access_token"][0] + ) defer.returnValue((200, result)) # we throw for non 200 responses return @@ -166,9 +168,9 @@ class RegisterRestServlet(RestServlet): return 200, {} @defer.inlineCallbacks - def _do_appservice_registration(self, username): - (user_id, token) = yield self.registration_handler.register( - localpart=username + def _do_appservice_registration(self, username, as_token): + (user_id, token) = yield self.registration_handler.appservice_register( + username, as_token ) defer.returnValue(self._create_registration_details(user_id, token)) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 3edc2ec2e..66fd25964 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -58,7 +58,9 @@ class RegisterRestServletTestCase(unittest.TestCase): self.appservice = { "id": "1234" } - self.registration_handler.register = Mock(return_value=(user_id, token)) + self.registration_handler.appservice_register = Mock( + return_value=(user_id, token) + ) result = yield self.servlet.on_POST(self.request) self.assertEquals(result, (200, { "user_id": user_id, From 28d07a02e42c90732b708de870af2f9b21ba13f0 Mon Sep 17 00:00:00 2001 From: David Baker Date: Mon, 3 Aug 2015 15:31:21 +0100 Subject: [PATCH 094/266] Add vector.im as trusted ID server --- synapse/handlers/identity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 6200e1077..c1095708a 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -44,7 +44,7 @@ class IdentityHandler(BaseHandler): http_client = SimpleHttpClient(self.hs) # XXX: make this configurable! # trustedIdServers = ['matrix.org', 'localhost:8090'] - trustedIdServers = ['matrix.org'] + trustedIdServers = ['matrix.org', 'vector.im'] if 'id_server' in creds: id_server = creds['id_server'] From 7148aaf5d0f75c463c93ac69885d05160fee4d4a Mon Sep 17 00:00:00 2001 From: David Baker Date: Mon, 3 Aug 2015 17:03:27 +0100 Subject: [PATCH 095/266] Don't try & check the username if we don't have one (which we won't if it's been saved in the auth layer) --- synapse/rest/client/v2_alpha/register.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index cf54e1dac..b5926f9ca 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -102,7 +102,8 @@ class RegisterRestServlet(RestServlet): if self.hs.config.disable_registration: raise SynapseError(403, "Registration has been disabled") - yield self.registration_handler.check_username(desired_username) + if desired_username is not None: + yield self.registration_handler.check_username(desired_username) if self.hs.config.enable_registration_captcha: flows = [ From 4d6cb8814e134eba644afeed7bd49df0c7951342 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Aug 2015 09:32:23 +0100 Subject: [PATCH 096/266] Speed up event filtering (for ACL) logic --- synapse/handlers/federation.py | 6 +- synapse/handlers/message.py | 6 +- synapse/handlers/sync.py | 6 +- synapse/storage/_base.py | 10 ++- synapse/storage/state.py | 123 ++++++++++++++++++++++----------- 5 files changed, 105 insertions(+), 46 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f7155fd8d..22f534e49 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -230,7 +230,11 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, None), + ) ) events_and_states = zip(events, states) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9d6d4f097..765b14d99 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -138,7 +138,11 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) ) events_and_states = zip(events, states) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6cff6230c..8f58774b3 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -295,7 +295,11 @@ class SyncHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): states = yield self.store.get_state_for_events( - room_id, [e.event_id for e in events], + room_id, frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) ) events_and_states = zip(events, states) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 8f812f0fd..7b76ee3b7 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -71,6 +71,11 @@ class Cache(object): self.thread = None caches_by_name[name] = self.cache + class Sentinel(object): + __slots__ = [] + + self.sentinel = Sentinel() + def check_thread(self): expected_thread = self.thread if expected_thread is None: @@ -85,9 +90,10 @@ class Cache(object): if len(keyargs) != self.keylen: raise ValueError("Expected a key to have %d items", self.keylen) - if keyargs in self.cache: + val = self.cache.get(keyargs, self.sentinel) + if val is not self.sentinel: cache_counter.inc_hits(self.name) - return self.cache[keyargs] + return val cache_counter.inc_misses(self.name) raise KeyError() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 47bec6549..7e9bd232c 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -17,6 +17,7 @@ from ._base import SQLBaseStore, cached from twisted.internet import defer +from synapse.util import unwrapFirstError from synapse.util.stringutils import random_string import logging @@ -206,62 +207,102 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @defer.inlineCallbacks - def get_state_for_events(self, room_id, event_ids): + @cached(num_args=3, lru=True) + def _get_state_groups_from_group(self, room_id, group, types): def f(txn): - groups = set() - event_to_group = {} - for event_id in event_ids: - # TODO: Remove this loop. - group = self._simple_select_one_onecol_txn( - txn, - table="event_to_state_groups", - keyvalues={"event_id": event_id}, - retcol="state_group", - allow_none=True, - ) - if group: - event_to_group[event_id] = group - groups.add(group) + sql = ( + "SELECT event_id FROM state_groups_state WHERE" + " room_id = ? AND state_group = ? AND (%s)" + ) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),) - group_to_state_ids = {} - for group in groups: - state_ids = self._simple_select_onecol_txn( - txn, - table="state_groups_state", - keyvalues={"state_group": group}, - retcol="event_id", - ) + args = [room_id, group] + args.extend([i for typ in types for i in typ]) + txn.execute(sql, args) - group_to_state_ids[group] = state_ids + return group, [ + r[0] + for r in txn.fetchall() + ] - return event_to_group, group_to_state_ids - - res = yield self.runInteraction( - "annotate_events_with_state_groups", + return self.runInteraction( + "_get_state_groups_from_group", f, ) - event_to_group, group_to_state_ids = res + @cached(num_args=3, lru=True, max_entries=100000) + def _get_state_for_event_id(self, room_id, event_id, types): + def f(txn): + type_and_state_sql = " OR ".join([ + "(type = ? AND state_key = ?)" + if typ[1] is not None + else "type = ?" + for typ in types + ]) - state_list = yield defer.gatherResults( - [ - self._fetch_events_for_group(group, vals) - for group, vals in group_to_state_ids.items() - ], - consumeErrors=True, + sql = ( + "SELECT sg.event_id FROM state_groups_state as sg" + " INNER JOIN event_to_state_groups as e" + " ON e.state_group = sg.state_group" + " WHERE e.event_id = ? AND (%s)" + ) % (type_and_state_sql,) + + args = [event_id] + for typ, state_key in types: + args.extend( + [typ, state_key] if state_key is not None else [typ] + ) + txn.execute(sql, args) + + return event_id, [ + r[0] + for r in txn.fetchall() + ] + + return self.runInteraction( + "_get_state_for_event_id", + f, ) - state_dict = { - group: { + @defer.inlineCallbacks + def get_state_for_events(self, room_id, event_ids, types): + set_types = frozenset(types) + res = yield defer.gatherResults( + [ + self._get_state_for_event_id( + room_id, event_id, set_types, + ) + for event_id in event_ids + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + event_to_state_ids = dict(res) + + event_dict = yield self._get_events( + [ + item + for lst in event_to_state_ids.values() + for item in lst + ], + get_prev_content=False + ).addCallback( + lambda evs: {ev.event_id: ev for ev in evs} + ) + + event_to_state = { + event_id: { (ev.type, ev.state_key): ev - for ev in state + for ev in [ + event_dict[state_id] + for state_id in state_ids + if state_id in event_dict + ] } - for group, state in state_list + for event_id, state_ids in event_to_state_ids.items() } defer.returnValue([ - state_dict.get(event_to_group.get(event, None), None) + event_to_state[event] for event in event_ids ]) From 413a4c289b0a7bc9655a6f8543ccf1375e2a9e34 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Aug 2015 11:08:07 +0100 Subject: [PATCH 097/266] Add comment --- synapse/storage/state.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 7e9bd232c..91a5ae86a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -265,6 +265,21 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def get_state_for_events(self, room_id, event_ids, types): + """Given a list of event_ids and type tuples, return a list of state + dicts for each event. The state dicts will only have the type/state_keys + that are in the `types` list. + + Args: + room_id (str) + event_ids (list) + types (list): List of (type, state_key) tuples which are used to + filter the state fetched. `state_key` may be None, which matches + any `state_key` + + Returns: + deferred: A list of dicts corresponding to the event_ids given. + The dicts are mappings from (type, state_key) -> state_events + """ set_types = frozenset(types) res = yield defer.gatherResults( [ From c77048e12f032842cebbb0f1a0639bb62db88418 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 14:37:09 +0100 Subject: [PATCH 098/266] Add endpoint that proxies ID server request token and errors if the given email is in use on this Home Server. --- synapse/api/errors.py | 1 + synapse/handlers/identity.py | 25 +++++++++++++++++ synapse/rest/client/v2_alpha/register.py | 27 ++++++++++++++++++- .../schema/delta/22/user_threepids_unique.sql | 19 +++++++++++++ 4 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/22/user_threepids_unique.sql diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 0b3320e62..c3b4d971a 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -40,6 +40,7 @@ class Codes(object): TOO_LARGE = "M_TOO_LARGE" EXCLUSIVE = "M_EXCLUSIVE" THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" + THREEPID_IN_USE = "THREEPID_IN_USE" class CodeMessageException(RuntimeError): diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index c1095708a..2a99921d5 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -117,3 +117,28 @@ class IdentityHandler(BaseHandler): except CodeMessageException as e: data = json.loads(e.msg) defer.returnValue(data) + + @defer.inlineCallbacks + def requestEmailToken(self, id_server, email, client_secret, send_attempt, **kwargs): + yield run_on_reactor() + http_client = SimpleHttpClient(self.hs) + + params = { + 'email': email, + 'client_secret': client_secret, + 'send_attempt': send_attempt, + } + params.update(kwargs) + + try: + data = yield http_client.post_urlencoded_get_json( + "https://%s%s" % ( + id_server, + "/_matrix/identity/api/v1/validate/email/requestToken" + ), + params + ) + defer.returnValue(data) + except CodeMessageException as e: + logger.info("Proxied requestToken failed: %r", e) + raise e diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b5926f9ca..7b97a73df 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) class RegisterRestServlet(RestServlet): - PATTERN = client_v2_pattern("/register") + PATTERN = client_v2_pattern("/register*") def __init__(self, hs): super(RegisterRestServlet, self).__init__() @@ -55,6 +55,11 @@ class RegisterRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): yield run_on_reactor() + + if '/register/email/requestToken' in request.path: + ret = yield self.onEmailTokenRequest(request) + defer.returnValue(ret) + body = parse_json_dict_from_request(request) # we do basic sanity checks here because the auth layer will store these @@ -209,6 +214,26 @@ class RegisterRestServlet(RestServlet): "home_server": self.hs.hostname, } + @defer.inlineCallbacks + def onEmailTokenRequest(self, request): + body = parse_json_dict_from_request(request) + + required = ['id_server', 'client_secret', 'email', 'send_attempt'] + absent = [] + for k in required: + if k not in body: + absent.append(k) + + existingUid = self.hs.get_datastore().get_user_id_by_threepid('email', body['email']) + if existingUid is not None: + raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) + + if len(absent) > 0: + raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) + + ret = yield self.identity_handler.requestEmailToken(**body) + defer.returnValue((200, ret)) + def register_servlets(hs, http_server): RegisterRestServlet(hs).register(http_server) diff --git a/synapse/storage/schema/delta/22/user_threepids_unique.sql b/synapse/storage/schema/delta/22/user_threepids_unique.sql new file mode 100644 index 000000000..87edfa454 --- /dev/null +++ b/synapse/storage/schema/delta/22/user_threepids_unique.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS user_threepids2 ( + user_id TEXT NOT NULL, + medium TEXT NOT NULL, + address TEXT NOT NULL, + validated_at BIGINT NOT NULL, + added_at BIGINT NOT NULL, + CONSTRAINT medium_address UNIQUE (medium, address) +); + +INSERT INTO user_threepids2 + SELECT * FROM user_threepids WHERE added_at IN ( + SELECT max(added_at) FROM user_threepids GROUP BY medium, address + ) +; + +DROP TABLE user_threepids; +ALTER TABLE user_threepids2 RENAME TO user_threepids; + +CREATE INDEX user_threepids_user_id ON user_threepids(user_id); From e1241285420556b8afb16aacb3eeb8f29e9859f0 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 14:50:31 +0100 Subject: [PATCH 099/266] Bump schema version --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 71d5d9250..99467dde0 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -54,7 +54,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 21 +SCHEMA_VERSION = 22 dir_path = os.path.abspath(os.path.dirname(__file__)) From 07ad03d5df0e3e797a23c5994308d77bf60bada3 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 15:18:40 +0100 Subject: [PATCH 100/266] Fix tests --- tests/rest/client/v2_alpha/test_register.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 66fd25964..f9a2b2248 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -13,6 +13,7 @@ class RegisterRestServletTestCase(unittest.TestCase): self.request_data = "" self.request = Mock( content=Mock(read=Mock(side_effect=lambda: self.request_data)), + path='/_matrix/api/v2_alpha/register' ) self.request.args = {} @@ -131,4 +132,4 @@ class RegisterRestServletTestCase(unittest.TestCase): }) self.registration_handler.register = Mock(return_value=("@user:id", "t")) d = self.servlet.on_POST(self.request) - return self.assertFailure(d, SynapseError) \ No newline at end of file + return self.assertFailure(d, SynapseError) From 883aabe4236f466b404bae7982dcb3b375dc53e1 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 15:20:35 +0100 Subject: [PATCH 101/266] splt long line --- synapse/rest/client/v2_alpha/register.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 7b97a73df..93f922370 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -224,7 +224,9 @@ class RegisterRestServlet(RestServlet): if k not in body: absent.append(k) - existingUid = self.hs.get_datastore().get_user_id_by_threepid('email', body['email']) + existingUid = self.hs.get_datastore().get_user_id_by_threepid( + 'email', body['email'] + ) if existingUid is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) From e7768e77f56709897215c13ee3fd187550d20fd2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Aug 2015 15:56:56 +0100 Subject: [PATCH 102/266] Add basic dictionary cache --- synapse/storage/util/caches.py | 94 ++++++++++++++++++++++++++++++ tests/util/test_dict_cache.py | 101 +++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 synapse/storage/util/caches.py create mode 100644 tests/util/test_dict_cache.py diff --git a/synapse/storage/util/caches.py b/synapse/storage/util/caches.py new file mode 100644 index 000000000..0877cc79f --- /dev/null +++ b/synapse/storage/util/caches.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util.lrucache import LruCache +from collections import namedtuple +import threading + + +DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value")) + + +class DictionaryCache(object): + + def __init__(self, name, max_entries=1000): + self.cache = LruCache(max_size=max_entries) + + self.name = name + self.sequence = 0 + self.thread = None + # caches_by_name[name] = self.cache + + class Sentinel(object): + __slots__ = [] + + self.sentinel = Sentinel() + + def check_thread(self): + expected_thread = self.thread + if expected_thread is None: + self.thread = threading.current_thread() + else: + if expected_thread is not threading.current_thread(): + raise ValueError( + "Cache objects can only be accessed from the main thread" + ) + + def get(self, key, dict_keys=None): + entry = self.cache.get(key, self.sentinel) + if entry is not self.sentinel: + # cache_counter.inc_hits(self.name) + + if dict_keys is None: + return DictionaryEntry(entry.full, dict(entry.value)) + else: + return DictionaryEntry(entry.full, { + k: entry.value[k] + for k in dict_keys + if k in entry.value + }) + + # cache_counter.inc_misses(self.name) + return DictionaryEntry(False, {}) + + def invalidate(self, key): + self.check_thread() + + # Increment the sequence number so that any SELECT statements that + # raced with the INSERT don't update the cache (SYN-369) + self.sequence += 1 + self.cache.pop(key, None) + + def invalidate_all(self): + self.check_thread() + self.sequence += 1 + self.cache.clear() + + def update(self, sequence, key, value, full=False): + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + if full: + self._insert(key, value) + else: + self._update_or_insert(key, value) + + def _update_or_insert(self, key, value): + entry = self.cache.setdefault(key, DictionaryEntry(False, {})) + entry.value.update(value) + + def _insert(self, key, value): + self.cache[key] = DictionaryEntry(True, value) diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py new file mode 100644 index 000000000..8cb9be658 --- /dev/null +++ b/tests/util/test_dict_cache.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer +from tests import unittest + +from synapse.storage.util.caches import DictionaryCache + + +class DictCacheTestCase(unittest.TestCase): + + def setUp(self): + self.cache = DictionaryCache("foobar") + + def test_simple_cache_hit_full(self): + key = "test_simple_cache_hit_full" + + v = self.cache.get(key) + self.assertEqual((False, {}), v) + + seq = self.cache.sequence + test_value = {"test": "test_simple_cache_hit_full"} + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key) + self.assertEqual(test_value, c.value) + + def test_simple_cache_hit_partial(self): + key = "test_simple_cache_hit_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_hit_partial" + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test"]) + self.assertEqual(test_value, c.value) + + def test_simple_cache_miss_partial(self): + key = "test_simple_cache_miss_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_miss_partial" + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test2"]) + self.assertEqual({}, c.value) + + def test_simple_cache_hit_miss_partial(self): + key = "test_simple_cache_hit_miss_partial" + + seq = self.cache.sequence + test_value = { + "test": "test_simple_cache_hit_miss_partial", + "test2": "test_simple_cache_hit_miss_partial2", + "test3": "test_simple_cache_hit_miss_partial3", + } + self.cache.update(seq, key, test_value, full=True) + + c = self.cache.get(key, ["test2"]) + self.assertEqual({"test2": "test_simple_cache_hit_miss_partial2"}, c.value) + + def test_multi_insert(self): + key = "test_simple_cache_hit_miss_partial" + + seq = self.cache.sequence + test_value_1 = { + "test": "test_simple_cache_hit_miss_partial", + } + self.cache.update(seq, key, test_value_1, full=False) + + seq = self.cache.sequence + test_value_2 = { + "test2": "test_simple_cache_hit_miss_partial2", + } + self.cache.update(seq, key, test_value_2, full=False) + + c = self.cache.get(key) + self.assertEqual( + { + "test": "test_simple_cache_hit_miss_partial", + "test2": "test_simple_cache_hit_miss_partial2", + }, + c.value + ) From c67ba143fa1c5d9de0e3e998b9ec74c1f3342742 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Aug 2015 15:58:28 +0100 Subject: [PATCH 103/266] Move DictionaryCache --- synapse/{storage/util/caches.py => util/dictionary_cache.py} | 0 tests/util/test_dict_cache.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename synapse/{storage/util/caches.py => util/dictionary_cache.py} (100%) diff --git a/synapse/storage/util/caches.py b/synapse/util/dictionary_cache.py similarity index 100% rename from synapse/storage/util/caches.py rename to synapse/util/dictionary_cache.py diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index 8cb9be658..79bc1225d 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -17,7 +17,7 @@ from twisted.internet import defer from tests import unittest -from synapse.storage.util.caches import DictionaryCache +from synapse.util.dictionary_cache import DictionaryCache class DictCacheTestCase(unittest.TestCase): From a0dea6eaed2315bff018f86820ed7a866ab0d2ef Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 16:18:17 +0100 Subject: [PATCH 104/266] Remember to yield: not much point testing is a deferred is not None --- synapse/rest/client/v2_alpha/register.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 93f922370..25dab6f9c 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -224,9 +224,10 @@ class RegisterRestServlet(RestServlet): if k not in body: absent.append(k) - existingUid = self.hs.get_datastore().get_user_id_by_threepid( + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) + if existingUid is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) From 185ac7ee6cde22b9b491ac97013b029071ec9d53 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 4 Aug 2015 16:29:54 +0100 Subject: [PATCH 105/266] Allow sign in using email address --- synapse/rest/client/v1/login.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 998d4d44c..8ce336482 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -74,17 +74,24 @@ class LoginRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def do_password_login(self, login_submission): - if not login_submission["user"].startswith('@'): - login_submission["user"] = UserID.create( - login_submission["user"], self.hs.hostname).to_string() + if 'medium' in login_submission and 'address' in login_submission: + user_id = yield self.hs.get_datastore().get_user_id_by_threepid( + login_submission['medium'], login_submission['address'] + ) + else: + user_id = login_submission['user'] + + if not user_id.startswith('@'): + user_id = UserID.create( + user_id, self.hs.hostname).to_string() handler = self.handlers.login_handler token = yield handler.login( - user=login_submission["user"], + user=user_id, password=login_submission["password"]) result = { - "user_id": login_submission["user"], # may have changed + "user_id": user_id, # may have changed "access_token": token, "home_server": self.hs.hostname, } From 07507643cb6a2fde1a87d229f8d77525627a0632 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Aug 2015 15:06:51 +0100 Subject: [PATCH 106/266] Use dictionary cache to do group -> state fetching --- synapse/handlers/federation.py | 2 +- synapse/state.py | 10 +- synapse/storage/_base.py | 39 ++++--- synapse/storage/state.py | 195 ++++++++++++++++++++----------- synapse/storage/stream.py | 3 +- synapse/util/dictionary_cache.py | 54 +++++---- tests/test_state.py | 2 +- 7 files changed, 195 insertions(+), 110 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 22f534e49..90649af9e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -507,7 +507,7 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) states = yield defer.gatherResults([ - self.state_handler.resolve_state_groups([e]) + self.state_handler.resolve_state_groups(room_id, [e]) for e in event_ids ]) states = dict(zip(event_ids, [s[1] for s in states])) diff --git a/synapse/state.py b/synapse/state.py index 80da90a72..b5e5d7bbd 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -96,7 +96,7 @@ class StateHandler(object): cache.ts = self.clock.time_msec() state = cache.state else: - res = yield self.resolve_state_groups(event_ids) + res = yield self.resolve_state_groups(room_id, event_ids) state = res[1] if event_type: @@ -155,13 +155,13 @@ class StateHandler(object): if event.is_state(): ret = yield self.resolve_state_groups( - [e for e, _ in event.prev_events], + event.room_id, [e for e, _ in event.prev_events], event_type=event.type, state_key=event.state_key, ) else: ret = yield self.resolve_state_groups( - [e for e, _ in event.prev_events], + event.room_id, [e for e, _ in event.prev_events], ) group, curr_state, prev_state = ret @@ -180,7 +180,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, event_ids, event_type=None, state_key=""): + def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -205,7 +205,7 @@ class StateHandler(object): ) state_groups = yield self.store.get_state_groups( - event_ids + room_id, event_ids ) logger.debug( diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 7b76ee3b7..803b9d599 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import StoreError from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.lrucache import LruCache +from synapse.util.dictionary_cache import DictionaryCache import synapse.metrics from util.id_generators import IdGenerator, StreamIdGenerator @@ -87,23 +88,33 @@ class Cache(object): ) def get(self, *keyargs): - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + try: + if len(keyargs) != self.keylen: + raise ValueError("Expected a key to have %d items", self.keylen) - val = self.cache.get(keyargs, self.sentinel) - if val is not self.sentinel: - cache_counter.inc_hits(self.name) - return val + val = self.cache.get(keyargs, self.sentinel) + if val is not self.sentinel: + cache_counter.inc_hits(self.name) + return val - cache_counter.inc_misses(self.name) - raise KeyError() + cache_counter.inc_misses(self.name) + raise KeyError() + except KeyError: + raise + except: + logger.exception("Cache.get failed for %s" % (self.name,)) + raise def update(self, sequence, *args): - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - self.prefill(*args) + try: + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + self.prefill(*args) + except: + logger.exception("Cache.update failed for %s" % (self.name,)) + raise def prefill(self, *args): # because I can't *keyargs, value keyargs = args[:-1] @@ -327,6 +338,8 @@ class SQLBaseStore(object): self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True, max_entries=hs.config.event_cache_size) + self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000) + self._event_fetch_lock = threading.Condition() self._event_fetch_list = [] self._event_fetch_ongoing = 0 diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 91a5ae86a..a967b3d44 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -45,52 +45,38 @@ class StateStore(SQLBaseStore): """ @defer.inlineCallbacks - def get_state_groups(self, event_ids): + def get_state_groups(self, room_id, event_ids): """ Get the state groups for the given list of event_ids The return value is a dict mapping group names to lists of events. """ - def f(txn): - groups = set() - for event_id in event_ids: - group = self._simple_select_one_onecol_txn( - txn, - table="event_to_state_groups", - keyvalues={"event_id": event_id}, - retcol="state_group", - allow_none=True, - ) - if group: - groups.add(group) - - res = {} - for group in groups: - state_ids = self._simple_select_onecol_txn( - txn, - table="state_groups_state", - keyvalues={"state_group": group}, - retcol="event_id", - ) - - res[group] = state_ids - - return res - - states = yield self.runInteraction( - "get_state_groups", - f, - ) - - state_list = yield defer.gatherResults( + event_and_groups = yield defer.gatherResults( [ - self._fetch_events_for_group(group, vals) - for group, vals in states.items() + self._get_state_group_for_event( + room_id, event_id, + ).addCallback(lambda group, event_id: (event_id, group), event_id) + for event_id in event_ids ], consumeErrors=True, - ) + ).addErrback(unwrapFirstError) - defer.returnValue(dict(state_list)) + groups = set(group for _, group in event_and_groups if group) + + group_to_state = yield defer.gatherResults( + [ + self._get_state_for_group( + group, + ).addCallback(lambda state_dict, group: (group, state_dict), group) + for group in groups + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + defer.returnValue({ + group: state_map.values() + for group, state_map in group_to_state + }) @cached(num_args=1) def _fetch_events_for_group(self, key, events): @@ -207,16 +193,25 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @cached(num_args=3, lru=True) - def _get_state_groups_from_group(self, room_id, group, types): + @cached(num_args=2, lru=True, max_entries=10000) + def _get_state_groups_from_group(self, group, types): def f(txn): + if types is not None: + where_clause = "AND (%s)" % ( + " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), + ) + else: + where_clause = "" + sql = ( "SELECT event_id FROM state_groups_state WHERE" - " room_id = ? AND state_group = ? AND (%s)" - ) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),) + " state_group = ? %s" + ) % (where_clause,) + + args = [group] + if types is not None: + args.extend([i for typ in types for i in typ]) - args = [room_id, group] - args.extend([i for typ in types for i in typ]) txn.execute(sql, args) return group, [ @@ -229,7 +224,7 @@ class StateStore(SQLBaseStore): f, ) - @cached(num_args=3, lru=True, max_entries=100000) + @cached(num_args=3, lru=True, max_entries=20000) def _get_state_for_event_id(self, room_id, event_id, types): def f(txn): type_and_state_sql = " OR ".join([ @@ -280,40 +275,33 @@ class StateStore(SQLBaseStore): deferred: A list of dicts corresponding to the event_ids given. The dicts are mappings from (type, state_key) -> state_events """ - set_types = frozenset(types) - res = yield defer.gatherResults( + event_and_groups = yield defer.gatherResults( [ - self._get_state_for_event_id( - room_id, event_id, set_types, - ) + self._get_state_group_for_event( + room_id, event_id, + ).addCallback(lambda group, event_id: (event_id, group), event_id) for event_id in event_ids ], consumeErrors=True, ).addErrback(unwrapFirstError) - event_to_state_ids = dict(res) + groups = set(group for _, group in event_and_groups) - event_dict = yield self._get_events( + res = yield defer.gatherResults( [ - item - for lst in event_to_state_ids.values() - for item in lst + self._get_state_for_group( + group, types + ).addCallback(lambda state_dict, group: (group, state_dict), group) + for group in groups ], - get_prev_content=False - ).addCallback( - lambda evs: {ev.event_id: ev for ev in evs} - ) + consumeErrors=True, + ).addErrback(unwrapFirstError) + + group_to_state = dict(res) event_to_state = { - event_id: { - (ev.type, ev.state_key): ev - for ev in [ - event_dict[state_id] - for state_id in state_ids - if state_id in event_dict - ] - } - for event_id, state_ids in event_to_state_ids.items() + event_id: group_to_state[group] + for event_id, group in event_and_groups } defer.returnValue([ @@ -321,6 +309,79 @@ class StateStore(SQLBaseStore): for event in event_ids ]) + @cached(num_args=2, lru=True, max_entries=100000) + def _get_state_group_for_event(self, room_id, event_id): + return self._simple_select_one_onecol( + table="event_to_state_groups", + keyvalues={ + "event_id": event_id, + }, + retcol="state_group", + allow_none=True, + desc="_get_state_group_for_event", + ) + + @defer.inlineCallbacks + def _get_state_for_group(self, group, types=None): + is_all, state_dict = self._state_group_cache.get(group) + + type_to_key = {} + missing_types = set() + if types is not None: + for typ, state_key in types: + if state_key is None: + type_to_key[typ] = None + missing_types.add((typ, state_key)) + else: + if type_to_key.get(typ, object()) is not None: + type_to_key.setdefault(typ, set()).add(state_key) + + if (typ, state_key) not in state_dict: + missing_types.add((typ, state_key)) + + if is_all and types is None: + defer.returnValue(state_dict) + + if is_all or (types is not None and not missing_types): + def include(typ, state_key): + sentinel = object() + valid_state_keys = type_to_key.get(typ, sentinel) + if valid_state_keys is sentinel: + return False + if valid_state_keys is None: + return True + if state_key in valid_state_keys: + return True + return False + + defer.returnValue({ + k: v + for k, v in state_dict.items() + if include(k[0], k[1]) + }) + + # Okay, so we have some missing_types, lets fetch them. + cache_seq_num = self._state_group_cache.sequence + _, state_ids = yield self._get_state_groups_from_group( + group, + frozenset(types) if types else None + ) + state_events = yield self._get_events(state_ids, get_prev_content=False) + state_dict = { + (e.type, e.state_key): e + for e in state_events + } + + # Update the cache + self._state_group_cache.update( + cache_seq_num, + key=group, + value=state_dict, + full=(types is None), + ) + + defer.returnValue(state_dict) + def _make_group_id(clock): return str(int(clock.time_msec())) + random_string(5) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index af45fc561..9db259d5f 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore): defer.returnValue((events, token)) @defer.inlineCallbacks - def get_recent_events_for_room(self, room_id, limit, end_token, - with_feedback=False, from_token=None): + def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): # TODO (erikj): Handle compressed feedback end_token = RoomStreamToken.parse_stream_token(end_token) diff --git a/synapse/util/dictionary_cache.py b/synapse/util/dictionary_cache.py index 0877cc79f..38b131677 100644 --- a/synapse/util/dictionary_cache.py +++ b/synapse/util/dictionary_cache.py @@ -16,6 +16,10 @@ from synapse.util.lrucache import LruCache from collections import namedtuple import threading +import logging + + +logger = logging.getLogger(__name__) DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value")) @@ -47,21 +51,25 @@ class DictionaryCache(object): ) def get(self, key, dict_keys=None): - entry = self.cache.get(key, self.sentinel) - if entry is not self.sentinel: - # cache_counter.inc_hits(self.name) + try: + entry = self.cache.get(key, self.sentinel) + if entry is not self.sentinel: + # cache_counter.inc_hits(self.name) - if dict_keys is None: - return DictionaryEntry(entry.full, dict(entry.value)) - else: - return DictionaryEntry(entry.full, { - k: entry.value[k] - for k in dict_keys - if k in entry.value - }) + if dict_keys is None: + return DictionaryEntry(entry.full, dict(entry.value)) + else: + return DictionaryEntry(entry.full, { + k: entry.value[k] + for k in dict_keys + if k in entry.value + }) - # cache_counter.inc_misses(self.name) - return DictionaryEntry(False, {}) + # cache_counter.inc_misses(self.name) + return DictionaryEntry(False, {}) + except: + logger.exception("get failed") + raise def invalidate(self, key): self.check_thread() @@ -77,14 +85,18 @@ class DictionaryCache(object): self.cache.clear() def update(self, sequence, key, value, full=False): - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - if full: - self._insert(key, value) - else: - self._update_or_insert(key, value) + try: + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + if full: + self._insert(key, value) + else: + self._update_or_insert(key, value) + except: + logger.exception("update failed") + raise def _update_or_insert(self, key, value): entry = self.cache.setdefault(key, DictionaryEntry(False, {})) diff --git a/tests/test_state.py b/tests/test_state.py index fea25f702..584535875 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -69,7 +69,7 @@ class StateGroupStore(object): self._next_group = 1 - def get_state_groups(self, event_ids): + def get_state_groups(self, room_id, event_ids): groups = {} for event_id in event_ids: group = self._event_to_state_group.get(event_id) From a89559d79714b8dd07ec5a73f2629d003fcad92c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Aug 2015 15:39:47 +0100 Subject: [PATCH 107/266] Use LRU cache by default --- synapse/storage/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 8f812f0fd..2a601e37e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -57,7 +57,7 @@ cache_counter = metrics.register_cache( class Cache(object): - def __init__(self, name, max_entries=1000, keylen=1, lru=False): + def __init__(self, name, max_entries=1000, keylen=1, lru=True): if lru: self.cache = LruCache(max_size=max_entries) self.max_entries = None @@ -141,7 +141,7 @@ class CacheDescriptor(object): which can be used to insert values into the cache specifically, without calling the calculation function. """ - def __init__(self, orig, max_entries=1000, num_args=1, lru=False): + def __init__(self, orig, max_entries=1000, num_args=1, lru=True): self.orig = orig self.max_entries = max_entries @@ -192,7 +192,7 @@ class CacheDescriptor(object): return wrapped -def cached(max_entries=1000, num_args=1, lru=False): +def cached(max_entries=1000, num_args=1, lru=True): return lambda orig: CacheDescriptor( orig, max_entries=max_entries, From 1e62a3d3a9d09fbd856b4abd733f4fb687f746e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Aug 2015 15:40:40 +0100 Subject: [PATCH 108/266] Up the cache size for 'get_joined_hosts_for_room' and 'get_users_in_room' --- synapse/storage/roommember.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4db07f6fb..55dd3f6cf 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -78,7 +78,7 @@ class RoomMemberStore(SQLBaseStore): lambda events: events[0] if events else None ) - @cached() + @cached(max_entries=5000) def get_users_in_room(self, room_id): def f(txn): @@ -154,7 +154,7 @@ class RoomMemberStore(SQLBaseStore): RoomsForUser(**r) for r in self.cursor_to_dict(txn) ] - @cached() + @cached(max_entries=5000) def get_joined_hosts_for_room(self, room_id): return self.runInteraction( "get_joined_hosts_for_room", From 7eea3e356ff58168f3525879a8eb684f0681ee68 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 6 Aug 2015 13:33:34 +0100 Subject: [PATCH 109/266] Make @cached cache deferreds rather than the deferreds' values --- synapse/storage/_base.py | 21 ++++++++------------- synapse/util/async.py | 9 +++++++-- tests/storage/test__base.py | 11 +++++++---- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f1265541b..8604d38c3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -15,6 +15,7 @@ import logging from synapse.api.errors import StoreError +from synapse.util.async import ObservableDeferred from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.lrucache import LruCache @@ -173,33 +174,27 @@ class CacheDescriptor(object): ) @functools.wraps(self.orig) - @defer.inlineCallbacks def wrapped(*args, **kwargs): arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] try: cached_result = cache.get(*keyargs) - if DEBUG_CACHES: - actual_result = yield self.function_to_call(obj, *args, **kwargs) - if actual_result != cached_result: - logger.error( - "Stale cache entry %s%r: cached: %r, actual %r", - self.orig.__name__, keyargs, - cached_result, actual_result, - ) - raise ValueError("Stale cache entry") - defer.returnValue(cached_result) + return cached_result.observe() except KeyError: # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated # while the SELECT is executing (SYN-369) sequence = cache.sequence - ret = yield self.function_to_call(obj, *args, **kwargs) + ret = defer.maybeDeferred( + self.function_to_call, + obj, *args, **kwargs + ) + ret = ObservableDeferred(ret, consumeErrors=False) cache.update(sequence, *(keyargs + [ret])) - defer.returnValue(ret) + return ret.observe() wrapped.invalidate = cache.invalidate wrapped.invalidate_all = cache.invalidate_all diff --git a/synapse/util/async.py b/synapse/util/async.py index 5a1d545c9..7bf2d38bb 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -51,7 +51,7 @@ class ObservableDeferred(object): object.__setattr__(self, "_observers", set()) def callback(r): - self._result = (True, r) + object.__setattr__(self, "_result", (True, r)) while self._observers: try: self._observers.pop().callback(r) @@ -60,7 +60,7 @@ class ObservableDeferred(object): return r def errback(f): - self._result = (False, f) + object.__setattr__(self, "_result", (False, f)) while self._observers: try: self._observers.pop().errback(f) @@ -97,3 +97,8 @@ class ObservableDeferred(object): def __setattr__(self, name, value): setattr(self._deferred, name, value) + + def __repr__(self): + return "" % ( + id(self), self._result, self._deferred, + ) diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 8c3d2952b..8fa305d18 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -17,6 +17,8 @@ from tests import unittest from twisted.internet import defer +from synapse.util.async import ObservableDeferred + from synapse.storage._base import Cache, cached @@ -178,19 +180,20 @@ class CacheDecoratorTestCase(unittest.TestCase): self.assertTrue(callcount[0] >= 14, msg="Expected callcount >= 14, got %d" % (callcount[0])) - @defer.inlineCallbacks def test_prefill(self): callcount = [0] + d = defer.succeed(123) + class A(object): @cached() def func(self, key): callcount[0] += 1 - return key + return d a = A() - a.func.prefill("foo", 123) + a.func.prefill("foo", ObservableDeferred(d)) - self.assertEquals((yield a.func("foo")), 123) + self.assertEquals(a.func("foo").result, d.result) self.assertEquals(callcount[0], 0) From 433314cc34295ffefca349b9fc6914d81f2521e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 6 Aug 2015 14:01:05 +0100 Subject: [PATCH 110/266] Re-implement DEBUG_CACHES flag --- synapse/storage/_base.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 7b2be6745..8384299a1 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -178,8 +178,23 @@ class CacheDescriptor(object): arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] try: - cached_result = cache.get(*keyargs) - return cached_result.observe() + cached_result_d = cache.get(*keyargs) + + if DEBUG_CACHES: + + @defer.inlineCallbacks + def check_result(cached_result): + actual_result = yield self.function_to_call(obj, *args, **kwargs) + if actual_result != cached_result: + logger.error( + "Stale cache entry %s%r: cached: %r, actual %r", + self.orig.__name__, keyargs, + cached_result, actual_result, + ) + raise ValueError("Stale cache entry") + cached_result_d.observe().addCallback(check_result) + + return cached_result_d.observe() except KeyError: # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated From b811c9857491c0569ae367708721fbbaebf3adab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 6 Aug 2015 14:01:27 +0100 Subject: [PATCH 111/266] Remove failed deferreds from cache --- synapse/storage/_base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 8384299a1..99c094875 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -205,8 +205,14 @@ class CacheDescriptor(object): self.function_to_call, obj, *args, **kwargs ) - ret = ObservableDeferred(ret, consumeErrors=False) + def onErr(f): + cache.invalidate(*keyargs) + return f + + ret.addErrback(onErr) + + ret = ObservableDeferred(ret, consumeErrors=False) cache.update(sequence, *(keyargs + [ret])) return ret.observe() From 63b1eaf32c57f0e1bae9b6a3768a560165637aee Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 6 Aug 2015 14:02:50 +0100 Subject: [PATCH 112/266] Docs --- synapse/storage/_base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 99c094875..0872a438f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -132,6 +132,9 @@ class Cache(object): class CacheDescriptor(object): """ A method decorator that applies a memoizing cache around the function. + This caches deferreds, rather than the results themselves. Deferreds that + fail are removed from the cache. + The function is presumed to take zero or more arguments, which are used in a tuple as the key for the cache. Hits are served directly from the cache; misses use the function body to generate the value. From fe994e728fba5ef43e0436f6472cd94d6ce3c902 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 10:17:38 +0100 Subject: [PATCH 113/266] Store absence of state in cache --- synapse/storage/state.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 48a402355..e924258d1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -234,7 +234,8 @@ class StateStore(SQLBaseStore): ]) sql = ( - "SELECT sg.event_id FROM state_groups_state as sg" + "SELECT e.event_id, sg.state_group, sg.event_id" + " FROM state_groups_state as sg" " INNER JOIN event_to_state_groups as e" " ON e.state_group = sg.state_group" " WHERE e.event_id = ? AND (%s)" @@ -342,8 +343,9 @@ class StateStore(SQLBaseStore): defer.returnValue(state_dict) if is_all or (types is not None and not missing_types): + sentinel = object() + def include(typ, state_key): - sentinel = object() valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: return False @@ -356,20 +358,24 @@ class StateStore(SQLBaseStore): defer.returnValue({ k: v for k, v in state_dict.items() - if include(k[0], k[1]) + if v and include(k[0], k[1]) }) # Okay, so we have some missing_types, lets fetch them. cache_seq_num = self._state_group_cache.sequence _, state_ids = yield self._get_state_groups_from_group( group, - frozenset(types) if types else None + frozenset(missing_types) if types else None ) state_events = yield self._get_events(state_ids, get_prev_content=False) state_dict = { + key: None + for key in missing_types + } + state_dict.update({ (e.type, e.state_key): e for e in state_events - } + }) # Update the cache self._state_group_cache.update( @@ -379,7 +385,11 @@ class StateStore(SQLBaseStore): full=(types is None), ) - defer.returnValue(state_dict) + defer.returnValue({ + key: value + for key, value in state_dict.items() + if value + }) def _make_group_id(clock): From b8e386db59eea6a59b8338acfd8ea42632d539be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 11:52:21 +0100 Subject: [PATCH 114/266] Change Cache to not use *args in its interface --- synapse/storage/__init__.py | 4 +- synapse/storage/_base.py | 85 ++++++++++++----------------- synapse/storage/directory.py | 4 +- synapse/storage/event_federation.py | 4 +- synapse/storage/events.py | 21 +++---- synapse/storage/keys.py | 2 +- synapse/storage/presence.py | 4 +- synapse/storage/push_rule.py | 16 +++--- synapse/storage/registration.py | 2 +- synapse/storage/roommember.py | 6 +- tests/storage/test__base.py | 12 ++-- 11 files changed, 73 insertions(+), 87 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 71d5d9250..1a6a8a376 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -99,7 +99,7 @@ class DataStore(RoomMemberStore, RoomStore, key = (user.to_string(), access_token, device_id, ip) try: - last_seen = self.client_ip_last_seen.get(*key) + last_seen = self.client_ip_last_seen.get(key) except KeyError: last_seen = None @@ -107,7 +107,7 @@ class DataStore(RoomMemberStore, RoomStore, if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: defer.returnValue(None) - self.client_ip_last_seen.prefill(*key + (now,)) + self.client_ip_last_seen.prefill(key, now) # It's safe not to lock here: a) no unique constraint, # b) LAST_SEEN_GRANULARITY makes concurrent updates incredibly unlikely diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d4751769e..32089b05e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -58,6 +58,9 @@ cache_counter = metrics.register_cache( ) +_CacheSentinel = object() + + class Cache(object): def __init__(self, name, max_entries=1000, keylen=1, lru=True): @@ -74,11 +77,6 @@ class Cache(object): self.thread = None caches_by_name[name] = self.cache - class Sentinel(object): - __slots__ = [] - - self.sentinel = Sentinel() - def check_thread(self): expected_thread = self.thread if expected_thread is None: @@ -89,52 +87,38 @@ class Cache(object): "Cache objects can only be accessed from the main thread" ) - def get(self, *keyargs): - try: - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + def get(self, keyargs, default=_CacheSentinel): + val = self.cache.get(keyargs, _CacheSentinel) + if val is not _CacheSentinel: + cache_counter.inc_hits(self.name) + return val - val = self.cache.get(keyargs, self.sentinel) - if val is not self.sentinel: - cache_counter.inc_hits(self.name) - return val + cache_counter.inc_misses(self.name) - cache_counter.inc_misses(self.name) + if default is _CacheSentinel: raise KeyError() - except KeyError: - raise - except: - logger.exception("Cache.get failed for %s" % (self.name,)) - raise + else: + return default - def update(self, sequence, *args): - try: - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - self.prefill(*args) - except: - logger.exception("Cache.update failed for %s" % (self.name,)) - raise - - def prefill(self, *args): # because I can't *keyargs, value - keyargs = args[:-1] - value = args[-1] - - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + def update(self, sequence, keyargs, value): + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + self.prefill(keyargs, value) + def prefill(self, keyargs, value): if self.max_entries is not None: while len(self.cache) >= self.max_entries: self.cache.popitem(last=False) self.cache[keyargs] = value - def invalidate(self, *keyargs): + def invalidate(self, keyargs): self.check_thread() - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + if not isinstance(keyargs, tuple): + raise ValueError("keyargs must be a tuple.") + # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 @@ -185,20 +169,21 @@ class CacheDescriptor(object): % (orig.__name__,) ) - def __get__(self, obj, objtype=None): - cache = Cache( + self.cache = Cache( name=self.orig.__name__, max_entries=self.max_entries, keylen=self.num_args, lru=self.lru, ) + def __get__(self, obj, objtype=None): + @functools.wraps(self.orig) def wrapped(*args, **kwargs): arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) - keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] + keyargs = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) try: - cached_result_d = cache.get(*keyargs) + cached_result_d = self.cache.get(keyargs) if DEBUG_CACHES: @@ -219,7 +204,7 @@ class CacheDescriptor(object): # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated # while the SELECT is executing (SYN-369) - sequence = cache.sequence + sequence = self.cache.sequence ret = defer.maybeDeferred( self.function_to_call, @@ -227,19 +212,19 @@ class CacheDescriptor(object): ) def onErr(f): - cache.invalidate(*keyargs) + self.cache.invalidate(keyargs) return f ret.addErrback(onErr) - ret = ObservableDeferred(ret, consumeErrors=False) - cache.update(sequence, *(keyargs + [ret])) + ret = ObservableDeferred(ret, consumeErrors=True) + self.cache.update(sequence, keyargs, ret) return ret.observe() - wrapped.invalidate = cache.invalidate - wrapped.invalidate_all = cache.invalidate_all - wrapped.prefill = cache.prefill + wrapped.invalidate = self.cache.invalidate + wrapped.invalidate_all = self.cache.invalidate_all + wrapped.prefill = self.cache.prefill obj.__dict__[self.orig.__name__] = wrapped diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 2b2bdf861..f3947bbe8 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -104,7 +104,7 @@ class DirectoryStore(SQLBaseStore): }, desc="create_room_alias_association", ) - self.get_aliases_for_room.invalidate(room_id) + self.get_aliases_for_room.invalidate((room_id,)) @defer.inlineCallbacks def delete_room_alias(self, room_alias): @@ -114,7 +114,7 @@ class DirectoryStore(SQLBaseStore): room_alias, ) - self.get_aliases_for_room.invalidate(room_id) + self.get_aliases_for_room.invalidate((room_id,)) defer.returnValue(room_id) def _delete_room_alias_txn(self, txn, room_alias): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 45b86c94e..910b6598a 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -362,7 +362,7 @@ class EventFederationStore(SQLBaseStore): for room_id in events_by_room: txn.call_after( - self.get_latest_event_ids_in_room.invalidate, room_id + self.get_latest_event_ids_in_room.invalidate, (room_id,) ) def get_backfill_events(self, room_id, event_list, limit): @@ -505,4 +505,4 @@ class EventFederationStore(SQLBaseStore): query = "DELETE FROM event_forward_extremities WHERE room_id = ?" txn.execute(query, (room_id,)) - txn.call_after(self.get_latest_event_ids_in_room.invalidate, room_id) + txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ed7ea3880..5b6491802 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -162,8 +162,8 @@ class EventsStore(SQLBaseStore): if current_state: txn.call_after(self.get_current_state_for_key.invalidate_all) txn.call_after(self.get_rooms_for_user.invalidate_all) - txn.call_after(self.get_users_in_room.invalidate, event.room_id) - txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) + txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) + txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) txn.call_after(self.get_room_name_and_aliases, event.room_id) self._simple_delete_txn( @@ -430,13 +430,13 @@ class EventsStore(SQLBaseStore): if not context.rejected: txn.call_after( self.get_current_state_for_key.invalidate, - event.room_id, event.type, event.state_key - ) + (event.room_id, event.type, event.state_key,) + ) if event.type in [EventTypes.Name, EventTypes.Aliases]: txn.call_after( self.get_room_name_and_aliases.invalidate, - event.room_id + (event.room_id,) ) self._simple_upsert_txn( @@ -567,8 +567,9 @@ class EventsStore(SQLBaseStore): def _invalidate_get_event_cache(self, event_id): for check_redacted in (False, True): for get_prev_content in (False, True): - self._get_event_cache.invalidate(event_id, check_redacted, - get_prev_content) + self._get_event_cache.invalidate( + (event_id, check_redacted, get_prev_content) + ) def _get_event_txn(self, txn, event_id, check_redacted=True, get_prev_content=False, allow_rejected=False): @@ -589,7 +590,7 @@ class EventsStore(SQLBaseStore): for event_id in events: try: ret = self._get_event_cache.get( - event_id, check_redacted, get_prev_content + (event_id, check_redacted, get_prev_content,) ) if allow_rejected or not ret.rejected_reason: @@ -822,7 +823,7 @@ class EventsStore(SQLBaseStore): ev.unsigned["prev_content"] = prev.get_dict()["content"] self._get_event_cache.prefill( - ev.event_id, check_redacted, get_prev_content, ev + (ev.event_id, check_redacted, get_prev_content), ev ) defer.returnValue(ev) @@ -879,7 +880,7 @@ class EventsStore(SQLBaseStore): ev.unsigned["prev_content"] = prev.get_dict()["content"] self._get_event_cache.prefill( - ev.event_id, check_redacted, get_prev_content, ev + (ev.event_id, check_redacted, get_prev_content), ev ) return ev diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index e3f98f0cd..49b8e37cf 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -131,7 +131,7 @@ class KeyStore(SQLBaseStore): desc="store_server_verify_key", ) - self.get_all_server_verify_keys.invalidate(server_name) + self.get_all_server_verify_keys.invalidate((server_name,)) def store_server_keys_json(self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes): diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index fefcf6bce..576cf670c 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -98,7 +98,7 @@ class PresenceStore(SQLBaseStore): updatevalues={"accepted": True}, desc="set_presence_list_accepted", ) - self.get_presence_list_accepted.invalidate(observer_localpart) + self.get_presence_list_accepted.invalidate((observer_localpart,)) defer.returnValue(result) def get_presence_list(self, observer_localpart, accepted=None): @@ -133,4 +133,4 @@ class PresenceStore(SQLBaseStore): "observed_user_id": observed_userid}, desc="del_presence_list", ) - self.get_presence_list_accepted.invalidate(observer_localpart) + self.get_presence_list_accepted.invalidate((observer_localpart,)) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a220f3632..9b88ca7b3 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -151,11 +151,11 @@ class PushRuleStore(SQLBaseStore): txn.execute(sql, (user_name, priority_class, new_rule_priority)) txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) self._simple_insert_txn( @@ -187,10 +187,10 @@ class PushRuleStore(SQLBaseStore): new_rule['priority'] = new_prio txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) self._simple_insert_txn( @@ -216,8 +216,8 @@ class PushRuleStore(SQLBaseStore): desc="delete_push_rule", ) - self.get_push_rules_for_user.invalidate(user_name) - self.get_push_rules_enabled_for_user.invalidate(user_name) + self.get_push_rules_for_user.invalidate((user_name,)) + self.get_push_rules_enabled_for_user.invalidate((user_name,)) @defer.inlineCallbacks def set_push_rule_enabled(self, user_name, rule_id, enabled): @@ -238,10 +238,10 @@ class PushRuleStore(SQLBaseStore): {'id': new_id}, ) txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 90e2606be..4eaa088b3 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -131,7 +131,7 @@ class RegistrationStore(SQLBaseStore): user_id ) for r in rows: - self.get_user_by_token.invalidate(r) + self.get_user_by_token.invalidate((r,)) @cached() def get_user_by_token(self, token): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 55dd3f6cf..9f14f38f2 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -54,9 +54,9 @@ class RoomMemberStore(SQLBaseStore): ) for event in events: - txn.call_after(self.get_rooms_for_user.invalidate, event.state_key) - txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) - txn.call_after(self.get_users_in_room.invalidate, event.room_id) + txn.call_after(self.get_rooms_for_user.invalidate, (event.state_key,)) + txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) + txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) def get_room_member(self, user_id, room_id): """Retrieve the current state of a room member. diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 8fa305d18..abee2f631 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -42,12 +42,12 @@ class CacheTestCase(unittest.TestCase): self.assertEquals(self.cache.get("foo"), 123) def test_invalidate(self): - self.cache.prefill("foo", 123) - self.cache.invalidate("foo") + self.cache.prefill(("foo",), 123) + self.cache.invalidate(("foo",)) failed = False try: - self.cache.get("foo") + self.cache.get(("foo",)) except KeyError: failed = True @@ -141,7 +141,7 @@ class CacheDecoratorTestCase(unittest.TestCase): self.assertEquals(callcount[0], 1) - a.func.invalidate("foo") + a.func.invalidate(("foo",)) yield a.func("foo") @@ -153,7 +153,7 @@ class CacheDecoratorTestCase(unittest.TestCase): def func(self, key): return key - A().func.invalidate("what") + A().func.invalidate(("what",)) @defer.inlineCallbacks def test_max_entries(self): @@ -193,7 +193,7 @@ class CacheDecoratorTestCase(unittest.TestCase): a = A() - a.func.prefill("foo", ObservableDeferred(d)) + a.func.prefill(("foo",), ObservableDeferred(d)) self.assertEquals(a.func("foo").result, d.result) self.assertEquals(callcount[0], 0) From b3768ec10ad7a96f0d7f9d774c44fe8ade1f80e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 13:41:05 +0100 Subject: [PATCH 115/266] Remove unncessary cache --- synapse/storage/state.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e924258d1..5588c9e69 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -192,7 +192,6 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - @cached(num_args=2, lru=True, max_entries=10000) def _get_state_groups_from_group(self, group, types): def f(txn): if types is not None: From b2c7bd4b098bea86189b2c8323265e23673d257d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 14:42:34 +0100 Subject: [PATCH 116/266] Cache get_recent_events_for_room --- synapse/storage/stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 9db259d5f..b59fe8100 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -35,7 +35,7 @@ what sort order was used: from twisted.internet import defer -from ._base import SQLBaseStore +from ._base import SQLBaseStore, cachedInlineCallbacks from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.logutils import log_function @@ -299,7 +299,7 @@ class StreamStore(SQLBaseStore): defer.returnValue((events, token)) - @defer.inlineCallbacks + @cachedInlineCallbacks(num_args=4) def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): # TODO (erikj): Handle compressed feedback From efe60d5e8c76cede325aa09a138f5033b90b8d90 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 7 Aug 2015 16:36:42 +0100 Subject: [PATCH 117/266] Only print the pidfile path on startup if requested by a commandline flag --- synapse/app/homeserver.py | 3 ++- synapse/config/server.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 49e27c9e1..f04493f92 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -657,7 +657,8 @@ def run(hs): if hs.config.daemonize: - print hs.config.pid_file + if hs.config.print_pidfile: + print hs.config.pid_file daemon = Daemonize( app="synapse-homeserver", diff --git a/synapse/config/server.py b/synapse/config/server.py index f4d4a8710..f9a3b5f15 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -24,6 +24,7 @@ class ServerConfig(Config): self.web_client = config["web_client"] self.soft_file_limit = config["soft_file_limit"] self.daemonize = config.get("daemonize") + self.print_pidfile = config.get("print_pidfile") self.use_frozen_dicts = config.get("use_frozen_dicts", True) self.listeners = config.get("listeners", []) @@ -208,12 +209,18 @@ class ServerConfig(Config): self.manhole = args.manhole if args.daemonize is not None: self.daemonize = args.daemonize + if args.print_pidfile is not None: + self.print_pidfile = args.print_pidfile def add_arguments(self, parser): server_group = parser.add_argument_group("server") server_group.add_argument("-D", "--daemonize", action='store_true', default=None, help="Daemonize the home server") + server_group.add_argument("--print-pidfile", action='store_true', + default=None, + help="Print the path to the pidfile just" + " before daemonizing") server_group.add_argument("--manhole", metavar="PORT", dest="manhole", type=int, help="Turn on the twisted telnet manhole" From e3c8e2c13c7c99a10d52a984c0bffa5715868c9a Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 7 Aug 2015 16:42:27 +0100 Subject: [PATCH 118/266] Add a --generate-keys option --- synapse/config/_base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index d483c67c6..c408db2b4 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -138,6 +138,11 @@ class Config(object): action="store_true", help="Generate a config file for the server name" ) + config_parser.add_argument( + "--generate-keys", + action="store_true", + help="Generate any missing key files then exit" + ) config_parser.add_argument( "-H", "--server-name", help="The server name to generate a config file for" @@ -230,4 +235,8 @@ class Config(object): obj.invoke_all("read_arguments", args) + if config_args.generate_keys: + obj.invoke_all("generate_files", config) + sys.exit(0) + return obj From 0db40d3e939506e22b927214bb0d3eaad40730ab Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 7 Aug 2015 17:22:11 +0100 Subject: [PATCH 119/266] Don't complain about extra .pyc files we find while hunting for database schemas --- synapse/storage/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 71d5d9250..973e78e04 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -354,6 +354,11 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files, ) logger.debug("Running script %s", relative_path) module.run_upgrade(cur, database_engine) + elif ext == ".pyc": + # Sometimes .pyc files turn up anyway even though we've + # disabled their generation; e.g. from distribution package + # installers. Silently skip it + pass elif ext == ".sql": # A plain old .sql file, just read and execute it logger.debug("Applying schema %s", relative_path) From ffdb8c382860ce2e351614a91c2ce07a91c61455 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 18:13:48 +0100 Subject: [PATCH 120/266] Don't be too enthusiatic with defer.gatherResults --- synapse/handlers/message.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 765b14d99..11c736f72 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -405,10 +405,14 @@ class MessageHandler(BaseHandler): except: logger.exception("Failed to get snapshot") - yield defer.gatherResults( - [handle_room(e) for e in room_list], - consumeErrors=True - ).addErrback(unwrapFirstError) + # Only do N rooms at once + n = 5 + d_list = [handle_room(e) for e in room_list] + for ds in [d_list[i:i + n] for i in range(0, len(d_list), n)]: + yield defer.gatherResults( + ds, + consumeErrors=True + ).addErrback(unwrapFirstError) ret = { "rooms": rooms_ret, From 02118901347ab5067fbb47169b36a8424e671bfb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 18:14:49 +0100 Subject: [PATCH 121/266] Implement a CacheListDescriptor --- synapse/storage/_base.py | 106 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 32089b05e..556aa3b52 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -16,6 +16,7 @@ import logging from synapse.api.errors import StoreError from synapse.util.async import ObservableDeferred +from synapse.util import unwrapFirstError from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.lrucache import LruCache @@ -231,6 +232,101 @@ class CacheDescriptor(object): return wrapped +class CacheListDescriptor(object): + def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False): + self.orig = orig + + if inlineCallbacks: + self.function_to_call = defer.inlineCallbacks(orig) + else: + self.function_to_call = orig + + self.num_args = num_args + self.list_name = list_name + + self.arg_names = inspect.getargspec(orig).args[1:num_args+1] + self.list_pos = self.arg_names.index(self.list_name) + + self.cache = cache + + self.sentinel = object() + + if len(self.arg_names) < self.num_args: + raise Exception( + "Not enough explicit positional arguments to key off of for %r." + " (@cached cannot key off of *args or **kwars)" + % (orig.__name__,) + ) + + if self.list_name not in self.arg_names: + raise Exception( + "Couldn't see arguments %r for %r." + % (self.list_name, cache.name,) + ) + + def __get__(self, obj, objtype=None): + + @functools.wraps(self.orig) + def wrapped(*args, **kwargs): + arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) + keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] + list_args = arg_dict[self.list_name] + + cached = {} + missing = [] + for arg in list_args: + key = list(keyargs) + key[self.list_pos] = arg + + try: + res = self.cache.get(tuple(key)).observe() + res.addCallback(lambda r, arg: (arg, r), arg) + cached[arg] = res + except KeyError: + missing.append(arg) + + if missing: + sequence = self.cache.sequence + args_to_call = dict(arg_dict) + args_to_call[self.list_name] = missing + + ret_d = defer.maybeDeferred( + self.function_to_call, + **args_to_call + ) + + ret_d = ObservableDeferred(ret_d) + + for arg in missing: + observer = ret_d.observe() + observer.addCallback(lambda r, arg: r[arg], arg) + + observer = ObservableDeferred(observer) + + key = list(keyargs) + key[self.list_pos] = arg + self.cache.update(sequence, tuple(key), observer) + + def invalidate(f, key): + self.cache.invalidate(key) + return f + observer.addErrback(invalidate, tuple(key)) + + res = observer.observe() + res.addCallback(lambda r, arg: (arg, r), arg) + + cached[arg] = res + + return defer.gatherResults( + cached.values(), + consumeErrors=True, + ).addErrback(unwrapFirstError).addCallback(lambda res: dict(res)) + + obj.__dict__[self.orig.__name__] = wrapped + + return wrapped + + def cached(max_entries=1000, num_args=1, lru=True): return lambda orig: CacheDescriptor( orig, @@ -250,6 +346,16 @@ def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False): ) +def cachedList(cache, list_name, num_args=1, inlineCallbacks=False): + return lambda orig: CacheListDescriptor( + orig, + cache=cache, + list_name=list_name, + num_args=num_args, + inlineCallbacks=inlineCallbacks, + ) + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() From 9eb5b23d3aeb0ddfb91cba0f36155dd8dcfbe8a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 18:15:30 +0100 Subject: [PATCH 122/266] Batch up various DB requests for event -> state --- synapse/storage/state.py | 219 +++++++++++++++++++++++++-------------- 1 file changed, 142 insertions(+), 77 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 5588c9e69..a04731ae1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -13,11 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached, cachedInlineCallbacks +from ._base import SQLBaseStore, cached, cachedInlineCallbacks, cachedList from twisted.internet import defer -from synapse.util import unwrapFirstError from synapse.util.stringutils import random_string import logging @@ -50,32 +49,20 @@ class StateStore(SQLBaseStore): The return value is a dict mapping group names to lists of events. """ + if not event_ids: + defer.returnValue({}) - event_and_groups = yield defer.gatherResults( - [ - self._get_state_group_for_event( - room_id, event_id, - ).addCallback(lambda group, event_id: (event_id, group), event_id) - for event_id in event_ids - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + event_to_groups = yield self._get_state_group_for_events( + room_id, event_ids, + ) - groups = set(group for _, group in event_and_groups if group) + groups = set(event_to_groups.values()) - group_to_state = yield defer.gatherResults( - [ - self._get_state_for_group( - group, - ).addCallback(lambda state_dict, group: (group, state_dict), group) - for group in groups - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + group_to_state = yield self._get_state_for_groups(groups) defer.returnValue({ group: state_map.values() - for group, state_map in group_to_state + for group, state_map in group_to_state.items() }) @cached(num_args=1) @@ -212,17 +199,48 @@ class StateStore(SQLBaseStore): txn.execute(sql, args) - return group, [ - r[0] - for r in txn.fetchall() - ] + return [r[0] for r in txn.fetchall()] return self.runInteraction( "_get_state_groups_from_group", f, ) - @cached(num_args=3, lru=True, max_entries=20000) + def _get_state_groups_from_groups(self, groups_and_types): + def f(txn): + results = {} + for group, types in groups_and_types: + if types is not None: + where_clause = "AND (%s)" % ( + " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), + ) + else: + where_clause = "" + + sql = ( + "SELECT event_id FROM state_groups_state WHERE" + " state_group = ? %s" + ) % (where_clause,) + + args = [group] + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + + results[group] = [ + r[0] + for r in txn.fetchall() + ] + + return results + + return self.runInteraction( + "_get_state_groups_from_groups", + f, + ) + + @cached(num_args=3, lru=True, max_entries=10000) def _get_state_for_event_id(self, room_id, event_id, types): def f(txn): type_and_state_sql = " OR ".join([ @@ -274,33 +292,19 @@ class StateStore(SQLBaseStore): deferred: A list of dicts corresponding to the event_ids given. The dicts are mappings from (type, state_key) -> state_events """ - event_and_groups = yield defer.gatherResults( - [ - self._get_state_group_for_event( - room_id, event_id, - ).addCallback(lambda group, event_id: (event_id, group), event_id) - for event_id in event_ids - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + event_to_groups = yield self._get_state_group_for_events( + room_id, event_ids, + ) - groups = set(group for _, group in event_and_groups) + groups = set(event_to_groups.values()) - res = yield defer.gatherResults( - [ - self._get_state_for_group( - group, types - ).addCallback(lambda state_dict, group: (group, state_dict), group) - for group in groups - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) - - group_to_state = dict(res) + group_to_state = yield self._get_state_for_groups( + groups, types + ) event_to_state = { event_id: group_to_state[group] - for event_id, group in event_and_groups + for event_id, group in event_to_groups.items() } defer.returnValue([ @@ -320,8 +324,29 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_event", ) - @defer.inlineCallbacks - def _get_state_for_group(self, group, types=None): + @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", num_args=2) + def _get_state_group_for_events(self, room_id, event_ids): + def f(txn): + results = {} + for event_id in event_ids: + results[event_id] = self._simple_select_one_onecol_txn( + txn, + table="event_to_state_groups", + keyvalues={ + "event_id": event_id, + }, + retcol="state_group", + allow_none=True, + ) + + return results + + return self.runInteraction( + "_get_state_group_for_events", + f, + ) + + def _get_state_for_group_from_cache(self, group, types=None): is_all, state_dict = self._state_group_cache.get(group) type_to_key = {} @@ -339,7 +364,7 @@ class StateStore(SQLBaseStore): missing_types.add((typ, state_key)) if is_all and types is None: - defer.returnValue(state_dict) + return state_dict, missing_types if is_all or (types is not None and not missing_types): sentinel = object() @@ -354,41 +379,81 @@ class StateStore(SQLBaseStore): return True return False - defer.returnValue({ + return { k: v for k, v in state_dict.items() if v and include(k[0], k[1]) - }) + }, missing_types + + return {}, missing_types + + @defer.inlineCallbacks + def _get_state_for_groups(self, groups, types=None): + results = {} + missing_groups_and_types = [] + for group in groups: + state_dict, missing_types = self._get_state_for_group_from_cache( + group, types + ) + + if types is not None and not missing_types: + results[group] = { + key: value + for key, value in state_dict.items() + if value + } + else: + missing_groups_and_types.append(( + group, + missing_types if types else None + )) + + if not missing_groups_and_types: + defer.returnValue(results) # Okay, so we have some missing_types, lets fetch them. cache_seq_num = self._state_group_cache.sequence - _, state_ids = yield self._get_state_groups_from_group( - group, - frozenset(missing_types) if types else None + + group_state_dict = yield self._get_state_groups_from_groups( + missing_groups_and_types ) - state_events = yield self._get_events(state_ids, get_prev_content=False) - state_dict = { - key: None - for key in missing_types - } - state_dict.update({ - (e.type, e.state_key): e + + state_events = yield self._get_events( + [e_id for l in group_state_dict.values() for e_id in l], + get_prev_content=False + ) + + state_events = { + e.event_id: e for e in state_events - }) + } - # Update the cache - self._state_group_cache.update( - cache_seq_num, - key=group, - value=state_dict, - full=(types is None), - ) + for group, state_ids in group_state_dict.items(): + state_dict = { + key: None + for key in missing_types + } + evs = [state_events[e_id] for e_id in state_ids] + state_dict.update({ + (e.type, e.state_key): e + for e in evs + }) - defer.returnValue({ - key: value - for key, value in state_dict.items() - if value - }) + # Update the cache + self._state_group_cache.update( + cache_seq_num, + key=group, + value=state_dict, + full=(types is None), + ) + + results[group] = { + key: value + for key, value in state_dict.items() + if value + } + + defer.returnValue(results) def _make_group_id(clock): From 20addfa358e4f72b9f0c25d48c1e3ecfc08a68b2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 11:52:21 +0100 Subject: [PATCH 123/266] Change Cache to not use *args in its interface --- synapse/storage/__init__.py | 4 +- synapse/storage/_base.py | 61 +++++++++++++++-------------- synapse/storage/directory.py | 4 +- synapse/storage/event_federation.py | 4 +- synapse/storage/events.py | 21 +++++----- synapse/storage/keys.py | 2 +- synapse/storage/presence.py | 4 +- synapse/storage/push_rule.py | 16 ++++---- synapse/storage/registration.py | 2 +- synapse/storage/roommember.py | 6 +-- tests/storage/test__base.py | 12 +++--- 11 files changed, 69 insertions(+), 67 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 71d5d9250..1a6a8a376 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -99,7 +99,7 @@ class DataStore(RoomMemberStore, RoomStore, key = (user.to_string(), access_token, device_id, ip) try: - last_seen = self.client_ip_last_seen.get(*key) + last_seen = self.client_ip_last_seen.get(key) except KeyError: last_seen = None @@ -107,7 +107,7 @@ class DataStore(RoomMemberStore, RoomStore, if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: defer.returnValue(None) - self.client_ip_last_seen.prefill(*key + (now,)) + self.client_ip_last_seen.prefill(key, now) # It's safe not to lock here: a) no unique constraint, # b) LAST_SEEN_GRANULARITY makes concurrent updates incredibly unlikely diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 0872a438f..e07cf3b58 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -57,6 +57,9 @@ cache_counter = metrics.register_cache( ) +_CacheSentinel = object() + + class Cache(object): def __init__(self, name, max_entries=1000, keylen=1, lru=True): @@ -83,41 +86,38 @@ class Cache(object): "Cache objects can only be accessed from the main thread" ) - def get(self, *keyargs): - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) - - if keyargs in self.cache: + def get(self, keyargs, default=_CacheSentinel): + val = self.cache.get(keyargs, _CacheSentinel) + if val is not _CacheSentinel: cache_counter.inc_hits(self.name) - return self.cache[keyargs] + return val cache_counter.inc_misses(self.name) - raise KeyError() - def update(self, sequence, *args): + if default is _CacheSentinel: + raise KeyError() + else: + return default + + def update(self, sequence, keyargs, value): self.check_thread() if self.sequence == sequence: # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) - self.prefill(*args) - - def prefill(self, *args): # because I can't *keyargs, value - keyargs = args[:-1] - value = args[-1] - - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + self.prefill(keyargs, value) + def prefill(self, keyargs, value): if self.max_entries is not None: while len(self.cache) >= self.max_entries: self.cache.popitem(last=False) self.cache[keyargs] = value - def invalidate(self, *keyargs): + def invalidate(self, keyargs): self.check_thread() - if len(keyargs) != self.keylen: - raise ValueError("Expected a key to have %d items", self.keylen) + if not isinstance(keyargs, tuple): + raise ValueError("keyargs must be a tuple.") + # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 @@ -168,20 +168,21 @@ class CacheDescriptor(object): % (orig.__name__,) ) - def __get__(self, obj, objtype=None): - cache = Cache( + self.cache = Cache( name=self.orig.__name__, max_entries=self.max_entries, keylen=self.num_args, lru=self.lru, ) + def __get__(self, obj, objtype=None): + @functools.wraps(self.orig) def wrapped(*args, **kwargs): arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) - keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] + keyargs = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) try: - cached_result_d = cache.get(*keyargs) + cached_result_d = self.cache.get(keyargs) if DEBUG_CACHES: @@ -202,7 +203,7 @@ class CacheDescriptor(object): # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated # while the SELECT is executing (SYN-369) - sequence = cache.sequence + sequence = self.cache.sequence ret = defer.maybeDeferred( self.function_to_call, @@ -210,19 +211,19 @@ class CacheDescriptor(object): ) def onErr(f): - cache.invalidate(*keyargs) + self.cache.invalidate(keyargs) return f ret.addErrback(onErr) - ret = ObservableDeferred(ret, consumeErrors=False) - cache.update(sequence, *(keyargs + [ret])) + ret = ObservableDeferred(ret, consumeErrors=True) + self.cache.update(sequence, keyargs, ret) return ret.observe() - wrapped.invalidate = cache.invalidate - wrapped.invalidate_all = cache.invalidate_all - wrapped.prefill = cache.prefill + wrapped.invalidate = self.cache.invalidate + wrapped.invalidate_all = self.cache.invalidate_all + wrapped.prefill = self.cache.prefill obj.__dict__[self.orig.__name__] = wrapped diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 2b2bdf861..f3947bbe8 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -104,7 +104,7 @@ class DirectoryStore(SQLBaseStore): }, desc="create_room_alias_association", ) - self.get_aliases_for_room.invalidate(room_id) + self.get_aliases_for_room.invalidate((room_id,)) @defer.inlineCallbacks def delete_room_alias(self, room_alias): @@ -114,7 +114,7 @@ class DirectoryStore(SQLBaseStore): room_alias, ) - self.get_aliases_for_room.invalidate(room_id) + self.get_aliases_for_room.invalidate((room_id,)) defer.returnValue(room_id) def _delete_room_alias_txn(self, txn, room_alias): diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 45b86c94e..910b6598a 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -362,7 +362,7 @@ class EventFederationStore(SQLBaseStore): for room_id in events_by_room: txn.call_after( - self.get_latest_event_ids_in_room.invalidate, room_id + self.get_latest_event_ids_in_room.invalidate, (room_id,) ) def get_backfill_events(self, room_id, event_list, limit): @@ -505,4 +505,4 @@ class EventFederationStore(SQLBaseStore): query = "DELETE FROM event_forward_extremities WHERE room_id = ?" txn.execute(query, (room_id,)) - txn.call_after(self.get_latest_event_ids_in_room.invalidate, room_id) + txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ed7ea3880..5b6491802 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -162,8 +162,8 @@ class EventsStore(SQLBaseStore): if current_state: txn.call_after(self.get_current_state_for_key.invalidate_all) txn.call_after(self.get_rooms_for_user.invalidate_all) - txn.call_after(self.get_users_in_room.invalidate, event.room_id) - txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) + txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) + txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) txn.call_after(self.get_room_name_and_aliases, event.room_id) self._simple_delete_txn( @@ -430,13 +430,13 @@ class EventsStore(SQLBaseStore): if not context.rejected: txn.call_after( self.get_current_state_for_key.invalidate, - event.room_id, event.type, event.state_key - ) + (event.room_id, event.type, event.state_key,) + ) if event.type in [EventTypes.Name, EventTypes.Aliases]: txn.call_after( self.get_room_name_and_aliases.invalidate, - event.room_id + (event.room_id,) ) self._simple_upsert_txn( @@ -567,8 +567,9 @@ class EventsStore(SQLBaseStore): def _invalidate_get_event_cache(self, event_id): for check_redacted in (False, True): for get_prev_content in (False, True): - self._get_event_cache.invalidate(event_id, check_redacted, - get_prev_content) + self._get_event_cache.invalidate( + (event_id, check_redacted, get_prev_content) + ) def _get_event_txn(self, txn, event_id, check_redacted=True, get_prev_content=False, allow_rejected=False): @@ -589,7 +590,7 @@ class EventsStore(SQLBaseStore): for event_id in events: try: ret = self._get_event_cache.get( - event_id, check_redacted, get_prev_content + (event_id, check_redacted, get_prev_content,) ) if allow_rejected or not ret.rejected_reason: @@ -822,7 +823,7 @@ class EventsStore(SQLBaseStore): ev.unsigned["prev_content"] = prev.get_dict()["content"] self._get_event_cache.prefill( - ev.event_id, check_redacted, get_prev_content, ev + (ev.event_id, check_redacted, get_prev_content), ev ) defer.returnValue(ev) @@ -879,7 +880,7 @@ class EventsStore(SQLBaseStore): ev.unsigned["prev_content"] = prev.get_dict()["content"] self._get_event_cache.prefill( - ev.event_id, check_redacted, get_prev_content, ev + (ev.event_id, check_redacted, get_prev_content), ev ) return ev diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index e3f98f0cd..49b8e37cf 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -131,7 +131,7 @@ class KeyStore(SQLBaseStore): desc="store_server_verify_key", ) - self.get_all_server_verify_keys.invalidate(server_name) + self.get_all_server_verify_keys.invalidate((server_name,)) def store_server_keys_json(self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes): diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index fefcf6bce..576cf670c 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -98,7 +98,7 @@ class PresenceStore(SQLBaseStore): updatevalues={"accepted": True}, desc="set_presence_list_accepted", ) - self.get_presence_list_accepted.invalidate(observer_localpart) + self.get_presence_list_accepted.invalidate((observer_localpart,)) defer.returnValue(result) def get_presence_list(self, observer_localpart, accepted=None): @@ -133,4 +133,4 @@ class PresenceStore(SQLBaseStore): "observed_user_id": observed_userid}, desc="del_presence_list", ) - self.get_presence_list_accepted.invalidate(observer_localpart) + self.get_presence_list_accepted.invalidate((observer_localpart,)) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a220f3632..9b88ca7b3 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -151,11 +151,11 @@ class PushRuleStore(SQLBaseStore): txn.execute(sql, (user_name, priority_class, new_rule_priority)) txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) self._simple_insert_txn( @@ -187,10 +187,10 @@ class PushRuleStore(SQLBaseStore): new_rule['priority'] = new_prio txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) self._simple_insert_txn( @@ -216,8 +216,8 @@ class PushRuleStore(SQLBaseStore): desc="delete_push_rule", ) - self.get_push_rules_for_user.invalidate(user_name) - self.get_push_rules_enabled_for_user.invalidate(user_name) + self.get_push_rules_for_user.invalidate((user_name,)) + self.get_push_rules_enabled_for_user.invalidate((user_name,)) @defer.inlineCallbacks def set_push_rule_enabled(self, user_name, rule_id, enabled): @@ -238,10 +238,10 @@ class PushRuleStore(SQLBaseStore): {'id': new_id}, ) txn.call_after( - self.get_push_rules_for_user.invalidate, user_name + self.get_push_rules_for_user.invalidate, (user_name,) ) txn.call_after( - self.get_push_rules_enabled_for_user.invalidate, user_name + self.get_push_rules_enabled_for_user.invalidate, (user_name,) ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 90e2606be..4eaa088b3 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -131,7 +131,7 @@ class RegistrationStore(SQLBaseStore): user_id ) for r in rows: - self.get_user_by_token.invalidate(r) + self.get_user_by_token.invalidate((r,)) @cached() def get_user_by_token(self, token): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 55dd3f6cf..9f14f38f2 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -54,9 +54,9 @@ class RoomMemberStore(SQLBaseStore): ) for event in events: - txn.call_after(self.get_rooms_for_user.invalidate, event.state_key) - txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id) - txn.call_after(self.get_users_in_room.invalidate, event.room_id) + txn.call_after(self.get_rooms_for_user.invalidate, (event.state_key,)) + txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,)) + txn.call_after(self.get_users_in_room.invalidate, (event.room_id,)) def get_room_member(self, user_id, room_id): """Retrieve the current state of a room member. diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 8fa305d18..abee2f631 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -42,12 +42,12 @@ class CacheTestCase(unittest.TestCase): self.assertEquals(self.cache.get("foo"), 123) def test_invalidate(self): - self.cache.prefill("foo", 123) - self.cache.invalidate("foo") + self.cache.prefill(("foo",), 123) + self.cache.invalidate(("foo",)) failed = False try: - self.cache.get("foo") + self.cache.get(("foo",)) except KeyError: failed = True @@ -141,7 +141,7 @@ class CacheDecoratorTestCase(unittest.TestCase): self.assertEquals(callcount[0], 1) - a.func.invalidate("foo") + a.func.invalidate(("foo",)) yield a.func("foo") @@ -153,7 +153,7 @@ class CacheDecoratorTestCase(unittest.TestCase): def func(self, key): return key - A().func.invalidate("what") + A().func.invalidate(("what",)) @defer.inlineCallbacks def test_max_entries(self): @@ -193,7 +193,7 @@ class CacheDecoratorTestCase(unittest.TestCase): a = A() - a.func.prefill("foo", ObservableDeferred(d)) + a.func.prefill(("foo",), ObservableDeferred(d)) self.assertEquals(a.func("foo").result, d.result) self.assertEquals(callcount[0], 0) From 3213ff630ca6f7f624d65dbab55e2c4c65fe80ce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 19:13:45 +0100 Subject: [PATCH 124/266] Remove unnecessary cache --- synapse/storage/state.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55c6d5289..7ce51b9bd 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached, cachedInlineCallbacks +from ._base import SQLBaseStore, cachedInlineCallbacks from twisted.internet import defer @@ -91,7 +91,6 @@ class StateStore(SQLBaseStore): defer.returnValue(dict(state_list)) - @cached(num_args=1) def _fetch_events_for_group(self, key, events): return self._get_events( events, get_prev_content=False From 62126c996c5bc8d86d3e15c015a68e8e60622a55 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 19:17:58 +0100 Subject: [PATCH 125/266] Propogate stale cache errors to calling functions --- synapse/storage/_base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 0872a438f..524a00315 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -183,8 +183,8 @@ class CacheDescriptor(object): try: cached_result_d = cache.get(*keyargs) + observed = cached_result_d.observe() if DEBUG_CACHES: - @defer.inlineCallbacks def check_result(cached_result): actual_result = yield self.function_to_call(obj, *args, **kwargs) @@ -195,9 +195,10 @@ class CacheDescriptor(object): cached_result, actual_result, ) raise ValueError("Stale cache entry") - cached_result_d.observe().addCallback(check_result) + defer.returnValue(cached_result) + observed.addCallback(check_result) - return cached_result_d.observe() + return observed except KeyError: # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated From 9c5385b53ad6d8ac21227dbc3f150c7d2511b061 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Aug 2015 19:26:38 +0100 Subject: [PATCH 126/266] s/observed/observer/ --- synapse/storage/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 524a00315..5997603b3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -183,7 +183,7 @@ class CacheDescriptor(object): try: cached_result_d = cache.get(*keyargs) - observed = cached_result_d.observe() + observer = cached_result_d.observe() if DEBUG_CACHES: @defer.inlineCallbacks def check_result(cached_result): @@ -196,9 +196,9 @@ class CacheDescriptor(object): ) raise ValueError("Stale cache entry") defer.returnValue(cached_result) - observed.addCallback(check_result) + observer.addCallback(check_result) - return observed + return observer except KeyError: # Get the sequence number of the cache before reading from the # database so that we can tell if the cache is invalidated From 5119e416e855e1c5d78e8bd711bcbac2e15ee5f6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 10:05:30 +0100 Subject: [PATCH 127/266] Line length --- synapse/storage/state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 478b38286..a19364985 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -323,7 +323,8 @@ class StateStore(SQLBaseStore): desc="_get_state_group_for_event", ) - @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", num_args=2) + @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", + num_args=2) def _get_state_group_for_events(self, room_id, event_ids): def f(txn): results = {} From aa88582e008550aa5a341ebd8c31279a63074df9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 10:08:15 +0100 Subject: [PATCH 128/266] Do bounds check --- synapse/storage/state.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a19364985..06f0ab3ff 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -433,7 +433,10 @@ class StateStore(SQLBaseStore): key: None for key in missing_types } - evs = [state_events[e_id] for e_id in state_ids] + evs = [ + state_events[e_id] for e_id in state_ids + if e_id in state_events # This can happen if event is rejected. + ] state_dict.update({ (e.type, e.state_key): e for e in evs From 2cd6cb9f65e64ea49e0a8ba7c4851c79821243ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 10:38:47 +0100 Subject: [PATCH 129/266] Rename keyargs to args in Cache --- synapse/storage/_base.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a088a6175..1e2d43666 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -86,8 +86,8 @@ class Cache(object): "Cache objects can only be accessed from the main thread" ) - def get(self, keyargs, default=_CacheSentinel): - val = self.cache.get(keyargs, _CacheSentinel) + def get(self, key, default=_CacheSentinel): + val = self.cache.get(key, _CacheSentinel) if val is not _CacheSentinel: cache_counter.inc_hits(self.name) return val @@ -99,29 +99,29 @@ class Cache(object): else: return default - def update(self, sequence, keyargs, value): + def update(self, sequence, key, value): self.check_thread() if self.sequence == sequence: # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) - self.prefill(keyargs, value) + self.prefill(key, value) - def prefill(self, keyargs, value): + def prefill(self, key, value): if self.max_entries is not None: while len(self.cache) >= self.max_entries: self.cache.popitem(last=False) - self.cache[keyargs] = value + self.cache[key] = value - def invalidate(self, keyargs): + def invalidate(self, key): self.check_thread() - if not isinstance(keyargs, tuple): + if not isinstance(key, tuple): raise ValueError("keyargs must be a tuple.") # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 - self.cache.pop(keyargs, None) + self.cache.pop(key, None) def invalidate_all(self): self.check_thread() From 86eaaa885bb4f3d75990874a76c53b2a0870d639 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 13:44:44 +0100 Subject: [PATCH 130/266] Rename keyargs to args in CacheDescriptor --- synapse/storage/_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1e2d43666..e76ee2779 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -180,9 +180,9 @@ class CacheDescriptor(object): @functools.wraps(self.orig) def wrapped(*args, **kwargs): arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) - keyargs = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) + cache_key = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) try: - cached_result_d = self.cache.get(keyargs) + cached_result_d = self.cache.get(cache_key) observer = cached_result_d.observe() if DEBUG_CACHES: @@ -192,7 +192,7 @@ class CacheDescriptor(object): if actual_result != cached_result: logger.error( "Stale cache entry %s%r: cached: %r, actual %r", - self.orig.__name__, keyargs, + self.orig.__name__, cache_key, cached_result, actual_result, ) raise ValueError("Stale cache entry") @@ -212,13 +212,13 @@ class CacheDescriptor(object): ) def onErr(f): - self.cache.invalidate(keyargs) + self.cache.invalidate(cache_key) return f ret.addErrback(onErr) ret = ObservableDeferred(ret, consumeErrors=True) - self.cache.update(sequence, keyargs, ret) + self.cache.update(sequence, cache_key, ret) return ret.observe() From 559c51debcf5402cd5999aa1afb854b34e9de363 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 10 Aug 2015 14:07:08 +0100 Subject: [PATCH 131/266] Use TypeError instead of ValueError and give a nicer error mesasge when someone calls Cache.invalidate with the wrong type. --- synapse/storage/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index e76ee2779..73eea157a 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -116,7 +116,9 @@ class Cache(object): def invalidate(self, key): self.check_thread() if not isinstance(key, tuple): - raise ValueError("keyargs must be a tuple.") + raise TypeError( + "The cache key must be a tuple not %r" % (type(key),) + ) # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) From dcefac3b0637673179e4a107b8cdfb844ba3a6c9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 14:16:24 +0100 Subject: [PATCH 132/266] Comments --- synapse/util/dictionary_cache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/util/dictionary_cache.py b/synapse/util/dictionary_cache.py index 38b131677..c7564cdf0 100644 --- a/synapse/util/dictionary_cache.py +++ b/synapse/util/dictionary_cache.py @@ -26,6 +26,9 @@ DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value")) class DictionaryCache(object): + """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. + fetching a subset of dictionary keys for a particular key. + """ def __init__(self, name, max_entries=1000): self.cache = LruCache(max_size=max_entries) From bb0a475c30e63a6d9d749a1f8587582904d3ff92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 14:27:38 +0100 Subject: [PATCH 133/266] Comments --- synapse/storage/_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 27713e8b7..826c393cd 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -234,6 +234,12 @@ class CacheDescriptor(object): class CacheListDescriptor(object): + """Wraps an existing cache to support bulk fetching of keys. + + Given a list of keys it looks in the cache to find any hits, then passes + the list of missing keys to the wrapped fucntion. + """ + def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False): self.orig = orig From 2c019eea11ce83cbb64ae2ea75e7e7aa260e5f48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 14:44:41 +0100 Subject: [PATCH 134/266] Remove unused function --- synapse/storage/state.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 06f0ab3ff..b83f644fd 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -65,13 +65,6 @@ class StateStore(SQLBaseStore): for group, state_map in group_to_state.items() }) - def _fetch_events_for_group(self, key, events): - return self._get_events( - events, get_prev_content=False - ).addCallback( - lambda evs: (key, evs) - ) - def _store_state_groups_txn(self, txn, event, context): return self._store_mult_state_groups_txn(txn, [(event, context)]) From 017b798e4f3dedaf0c02fba4d5ec53b7130d6ef2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Aug 2015 15:01:06 +0100 Subject: [PATCH 135/266] Clean up StateStore --- synapse/storage/state.py | 73 ++++++---------------------------------- 1 file changed, 11 insertions(+), 62 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b83f644fd..64c5ae992 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -171,34 +171,9 @@ class StateStore(SQLBaseStore): events = yield self._get_events(event_ids, get_prev_content=False) defer.returnValue(events) - def _get_state_groups_from_group(self, group, types): - def f(txn): - if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) - else: - where_clause = "" - - sql = ( - "SELECT event_id FROM state_groups_state WHERE" - " state_group = ? %s" - ) % (where_clause,) - - args = [group] - if types is not None: - args.extend([i for typ in types for i in typ]) - - txn.execute(sql, args) - - return [r[0] for r in txn.fetchall()] - - return self.runInteraction( - "_get_state_groups_from_group", - f, - ) - def _get_state_groups_from_groups(self, groups_and_types): + """Returns dictionary state_group -> state event ids + """ def f(txn): results = {} for group, types in groups_and_types: @@ -232,41 +207,6 @@ class StateStore(SQLBaseStore): f, ) - @cached(num_args=3, lru=True, max_entries=10000) - def _get_state_for_event_id(self, room_id, event_id, types): - def f(txn): - type_and_state_sql = " OR ".join([ - "(type = ? AND state_key = ?)" - if typ[1] is not None - else "type = ?" - for typ in types - ]) - - sql = ( - "SELECT e.event_id, sg.state_group, sg.event_id" - " FROM state_groups_state as sg" - " INNER JOIN event_to_state_groups as e" - " ON e.state_group = sg.state_group" - " WHERE e.event_id = ? AND (%s)" - ) % (type_and_state_sql,) - - args = [event_id] - for typ, state_key in types: - args.extend( - [typ, state_key] if state_key is not None else [typ] - ) - txn.execute(sql, args) - - return event_id, [ - r[0] - for r in txn.fetchall() - ] - - return self.runInteraction( - "_get_state_for_event_id", - f, - ) - @defer.inlineCallbacks def get_state_for_events(self, room_id, event_ids, types): """Given a list of event_ids and type tuples, return a list of state @@ -319,6 +259,8 @@ class StateStore(SQLBaseStore): @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", num_args=2) def _get_state_group_for_events(self, room_id, event_ids): + """Returns mapping event_id -> state_group + """ def f(txn): results = {} for event_id in event_ids: @@ -340,6 +282,8 @@ class StateStore(SQLBaseStore): ) def _get_state_for_group_from_cache(self, group, types=None): + """Checks if group is in cache. See `_get_state_for_groups` + """ is_all, state_dict = self._state_group_cache.get(group) type_to_key = {} @@ -382,6 +326,11 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): + """Given list of groups returns dict of group -> list of state events + with matching types. `types` is a list of `(type, state_key)`, where + a `state_key` of None matches all state_keys. If `types` is None then + all events are returned. + """ results = {} missing_groups_and_types = [] for group in groups: From 10b874067b2755a5fce751ab5eb02da26b1e5eaa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 09:12:41 +0100 Subject: [PATCH 136/266] Fix state cache --- synapse/storage/state.py | 83 +++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 64c5ae992..19b16ed40 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -283,6 +283,9 @@ class StateStore(SQLBaseStore): def _get_state_for_group_from_cache(self, group, types=None): """Checks if group is in cache. See `_get_state_for_groups` + + Returns 2-tuple (`state_dict`, `missing_types`). `missing_types` is the + list of types that aren't in the cache for that group. """ is_all, state_dict = self._state_group_cache.get(group) @@ -300,29 +303,31 @@ class StateStore(SQLBaseStore): if (typ, state_key) not in state_dict: missing_types.add((typ, state_key)) - if is_all and types is None: - return state_dict, missing_types + if is_all: + missing_types = set() + if types is None: + return state_dict, set(), True - if is_all or (types is not None and not missing_types): - sentinel = object() + sentinel = object() - def include(typ, state_key): - valid_state_keys = type_to_key.get(typ, sentinel) - if valid_state_keys is sentinel: - return False - if valid_state_keys is None: - return True - if state_key in valid_state_keys: - return True + def include(typ, state_key): + if types is None: + return True + + valid_state_keys = type_to_key.get(typ, sentinel) + if valid_state_keys is sentinel: return False + if valid_state_keys is None: + return True + if state_key in valid_state_keys: + return True + return False - return { - k: v - for k, v in state_dict.items() - if v and include(k[0], k[1]) - }, missing_types - - return {}, missing_types + return { + k: v + for k, v in state_dict.items() + if include(k[0], k[1]) + }, missing_types, not missing_types and types is not None @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): @@ -333,25 +338,28 @@ class StateStore(SQLBaseStore): """ results = {} missing_groups_and_types = [] - for group in groups: - state_dict, missing_types = self._get_state_for_group_from_cache( + for group in set(groups): + state_dict, missing_types, got_all = self._get_state_for_group_from_cache( group, types ) - if types is not None and not missing_types: - results[group] = { - key: value - for key, value in state_dict.items() - if value - } - else: + results[group] = state_dict + + if not got_all: missing_groups_and_types.append(( group, missing_types if types else None )) if not missing_groups_and_types: - defer.returnValue(results) + defer.returnValue({ + k: { + key: ev + for key, ev in state.items() + if ev + } + for k, state in results.items() + }) # Okay, so we have some missing_types, lets fetch them. cache_seq_num = self._state_group_cache.sequence @@ -371,10 +379,15 @@ class StateStore(SQLBaseStore): } for group, state_ids in group_state_dict.items(): - state_dict = { - key: None - for key in missing_types - } + if types: + state_dict = { + key: None + for key in types + } + state_dict.update(results[group]) + else: + state_dict = results[group] + evs = [ state_events[e_id] for e_id in state_ids if e_id in state_events # This can happen if event is rejected. @@ -392,11 +405,11 @@ class StateStore(SQLBaseStore): full=(types is None), ) - results[group] = { + results[group].update({ key: value for key, value in state_dict.items() if value - } + }) defer.returnValue(results) From 1b994a97dd201d0f122a416f28dbbf1136304412 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 10:41:40 +0100 Subject: [PATCH 137/266] Fix application of ACLs --- synapse/handlers/federation.py | 11 +++++------ synapse/handlers/message.py | 16 ++++++++++++---- synapse/handlers/sync.py | 17 +++++++++++++---- synapse/storage/state.py | 6 +++--- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 90649af9e..2bfd0a40e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -229,7 +229,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): - states = yield self.store.get_state_for_events( + event_to_state = yield self.store.get_state_for_events( room_id, frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), @@ -237,8 +237,6 @@ class FederationHandler(BaseHandler): ) ) - events_and_states = zip(events, states) - def redact_disallowed(event_and_state): event, state = event_and_state @@ -275,9 +273,10 @@ class FederationHandler(BaseHandler): return event - res = map(redact_disallowed, events_and_states) - - logger.info("_filter_events_for_server %r", res) + res = map(redact_disallowed, [ + (e, event_to_state[e.event_id]) + for e in events + ]) defer.returnValue(res) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 11c736f72..95a8f05c0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -137,7 +137,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): - states = yield self.store.get_state_for_events( + event_id_to_state = yield self.store.get_state_for_events( room_id, frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), @@ -145,7 +145,8 @@ class MessageHandler(BaseHandler): ) ) - events_and_states = zip(events, states) + for ev, state in event_id_to_state.items(): + logger.info("event_id: %r, state: %r", ev, state) def allowed(event_and_state): event, state = event_and_state @@ -179,10 +180,17 @@ class MessageHandler(BaseHandler): return True - events_and_states = filter(allowed, events_and_states) + event_and_state = filter( + allowed, + [ + (e, event_id_to_state[e.event_id]) + for e in events + ] + ) + defer.returnValue([ ev - for ev, _ in events_and_states + for ev, _ in event_and_state ]) @defer.inlineCallbacks diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8f58774b3..9a97bff84 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -294,7 +294,7 @@ class SyncHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): - states = yield self.store.get_state_for_events( + event_id_to_state = yield self.store.get_state_for_events( room_id, frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), @@ -302,7 +302,8 @@ class SyncHandler(BaseHandler): ) ) - events_and_states = zip(events, states) + for ev, state in event_id_to_state.items(): + logger.info("event_id: %r, state: %r", ev, state) def allowed(event_and_state): event, state = event_and_state @@ -335,10 +336,18 @@ class SyncHandler(BaseHandler): return membership == Membership.INVITE return True - events_and_states = filter(allowed, events_and_states) + + event_and_state = filter( + allowed, + [ + (e, event_id_to_state[e.event_id]) + for e in events + ] + ) + defer.returnValue([ ev - for ev, _ in events_and_states + for ev, _ in event_and_state ]) @defer.inlineCallbacks diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 19b16ed40..a43853007 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -239,10 +239,10 @@ class StateStore(SQLBaseStore): for event_id, group in event_to_groups.items() } - defer.returnValue([ - event_to_state[event] + defer.returnValue({ + event: event_to_state[event] for event in event_ids - ]) + }) @cached(num_args=2, lru=True, max_entries=100000) def _get_state_group_for_event(self, room_id, event_id): From dc8399ee0059bb4ee93fb7c755bc36ade16230a8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 11:30:59 +0100 Subject: [PATCH 138/266] Remove debug loggers --- synapse/handlers/message.py | 3 --- synapse/handlers/sync.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 95a8f05c0..b941312ef 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -145,9 +145,6 @@ class MessageHandler(BaseHandler): ) ) - for ev, state in event_id_to_state.items(): - logger.info("event_id: %r, state: %r", ev, state) - def allowed(event_and_state): event, state = event_and_state diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9a97bff84..d960078e7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -302,9 +302,6 @@ class SyncHandler(BaseHandler): ) ) - for ev, state in event_id_to_state.items(): - logger.info("event_id: %r, state: %r", ev, state) - def allowed(event_and_state): event, state = event_and_state From 4762c276cb460044099a98a4343c3f2b0ce2abe4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 11:33:41 +0100 Subject: [PATCH 139/266] Docs --- synapse/storage/_base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 826c393cd..bdca61d2e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -241,6 +241,15 @@ class CacheListDescriptor(object): """ def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False): + """ + Args: + orig (function) + cache (Cache) + list_name (str): Name of the argument which is the bulk lookup list + num_args (int) + inlineCallbacks (bool): Whether orig is a generator that should + be wrapped by defer.inlineCallbacks + """ self.orig = orig if inlineCallbacks: From 6eaa116867ebfd22718505d1aa29dce4679c87be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 11:35:24 +0100 Subject: [PATCH 140/266] Comment --- synapse/storage/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index bdca61d2e..4d86fe7c7 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -288,6 +288,8 @@ class CacheListDescriptor(object): keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] list_args = arg_dict[self.list_name] + # cached is a dict arg -> deferred, where deferred results in a + # 2-tuple (`arg`, `result`) cached = {} missing = [] for arg in list_args: From 53a817518bd27e6838c747a03d605165f96dbd12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 11:40:40 +0100 Subject: [PATCH 141/266] Comments --- synapse/storage/_base.py | 2 ++ synapse/storage/state.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 4d86fe7c7..e5441aafb 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -315,6 +315,8 @@ class CacheListDescriptor(object): ret_d = ObservableDeferred(ret_d) + # We need to create deferreds for each arg in the list so that + # we can insert the new deferred into the cache. for arg in missing: observer = ret_d.observe() observer.addCallback(lambda r, arg: r[arg], arg) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a43853007..ea5fa9de7 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -173,6 +173,9 @@ class StateStore(SQLBaseStore): def _get_state_groups_from_groups(self, groups_and_types): """Returns dictionary state_group -> state event ids + + Args: + groups_and_types (list): list of 2-tuple (`group`, `types`) """ def f(txn): results = {} @@ -284,8 +287,11 @@ class StateStore(SQLBaseStore): def _get_state_for_group_from_cache(self, group, types=None): """Checks if group is in cache. See `_get_state_for_groups` - Returns 2-tuple (`state_dict`, `missing_types`). `missing_types` is the - list of types that aren't in the cache for that group. + Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). + `missing_types` is the list of types that aren't in the cache for that + group, or None if `types` is None. `got_all` is a bool indicating if + we successfully retrieved all requests state from the cache, if False + we need to query the DB for the missing state. """ is_all, state_dict = self._state_group_cache.get(group) @@ -323,11 +329,13 @@ class StateStore(SQLBaseStore): return True return False + got_all = not (missing_types or types is None) + return { k: v for k, v in state_dict.items() if include(k[0], k[1]) - }, missing_types, not missing_types and types is not None + }, missing_types, got_all @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): From 9dba813234fdeeef5097572dec9c975a4a83a99b Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 11 Aug 2015 16:34:17 +0100 Subject: [PATCH 142/266] Remove redundant if-guard The startswith("@") does the job --- synapse/api/auth.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 487be7ce9..bf02347f6 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -521,23 +521,22 @@ class Auth(object): # Check state_key if hasattr(event, "state_key"): - if not event.state_key.startswith("_"): - if event.state_key.startswith("@"): - if event.state_key != event.user_id: + if event.state_key.startswith("@"): + if event.state_key != event.user_id: + raise AuthError( + 403, + "You are not allowed to set others state" + ) + else: + sender_domain = UserID.from_string( + event.user_id + ).domain + + if sender_domain != event.state_key: raise AuthError( 403, "You are not allowed to set others state" ) - else: - sender_domain = UserID.from_string( - event.user_id - ).domain - - if sender_domain != event.state_key: - raise AuthError( - 403, - "You are not allowed to set others state" - ) return True From 88e03da39f3746f7d754150964da064772b6ba5f Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 11 Aug 2015 16:35:28 +0100 Subject: [PATCH 143/266] Minor docs cleanup --- synapse/api/auth.py | 9 +++++++-- synapse/types.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index bf02347f6..0eef7442e 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -44,6 +44,11 @@ class Auth(object): def check(self, event, auth_events): """ Checks if this event is correctly authed. + Args: + event: the event being checked. + auth_events (dict: event-key -> event): the existing room state. + + Returns: True if the auth checks pass. """ @@ -319,7 +324,7 @@ class Auth(object): Returns: tuple : of UserID and device string: User ID object of the user making the request - Client ID object of the client instance the user is using + ClientInfo object of the client instance the user is using Raises: AuthError if no user by that token exists or the token is invalid. """ @@ -352,7 +357,7 @@ class Auth(object): ) return except KeyError: - pass # normal users won't have this query parameter set + pass # normal users won't have the user_id query parameter set. user_info = yield self.get_user_by_token(access_token) user = user_info["user"] diff --git a/synapse/types.py b/synapse/types.py index dd1b10d64..e190374cb 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -178,7 +178,7 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")): Live tokens start with an "s" followed by the "stream_ordering" id of the event it comes after. Historic tokens start with a "t" followed by the - "topological_ordering" id of the event it comes after, follewed by "-", + "topological_ordering" id of the event it comes after, followed by "-", followed by the "stream_ordering" id of the event it comes after. """ __slots__ = [] @@ -211,4 +211,5 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")): return "s%d" % (self.stream,) +# token_id is the primary key ID of the access token, not the access token itself. ClientInfo = namedtuple("ClientInfo", ("device_id", "token_id")) From 45610305eac1d5ce15faf0bf0482442aa896f4a6 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 11 Aug 2015 16:43:27 +0100 Subject: [PATCH 144/266] Add missing space because linter --- synapse/api/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 0eef7442e..a7f428a96 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -357,7 +357,7 @@ class Auth(object): ) return except KeyError: - pass # normal users won't have the user_id query parameter set. + pass # normal users won't have the user_id query parameter set. user_info = yield self.get_user_by_token(access_token) user = user_info["user"] From dcd5983fe4eb2a7c403c9969d2d8c12342440ff7 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 11 Aug 2015 16:54:06 +0100 Subject: [PATCH 145/266] Remove call to recently removed function in mock --- tests/test_distributor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_distributor.py b/tests/test_distributor.py index 6a0095d85..8ed48cfb7 100644 --- a/tests/test_distributor.py +++ b/tests/test_distributor.py @@ -73,8 +73,8 @@ class DistributorTestCase(unittest.TestCase): yield d self.assertTrue(d.called) - observers[0].assert_called_once("Go") - observers[1].assert_called_once("Go") + observers[0].assert_called_once_with("Go") + observers[1].assert_called_once_with("Go") self.assertEquals(mock_logger.warning.call_count, 1) self.assertIsInstance(mock_logger.warning.call_args[0][0], From 4ff0228c254e146e37ffd2b1c1df22d1b67353cb Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Tue, 11 Aug 2015 16:54:06 +0100 Subject: [PATCH 146/266] Remove call to recently removed function in mock --- tests/test_distributor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_distributor.py b/tests/test_distributor.py index 6a0095d85..8ed48cfb7 100644 --- a/tests/test_distributor.py +++ b/tests/test_distributor.py @@ -73,8 +73,8 @@ class DistributorTestCase(unittest.TestCase): yield d self.assertTrue(d.called) - observers[0].assert_called_once("Go") - observers[1].assert_called_once("Go") + observers[0].assert_called_once_with("Go") + observers[1].assert_called_once_with("Go") self.assertEquals(mock_logger.warning.call_count, 1) self.assertIsInstance(mock_logger.warning.call_args[0][0], From 2df8dd9b37f26e3ad0d3647a1e78804a85d48c0c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Aug 2015 17:59:32 +0100 Subject: [PATCH 147/266] Move all the caches into their own package, synapse.util.caches --- synapse/federation/federation_client.py | 2 +- synapse/state.py | 2 +- synapse/storage/_base.py | 335 +--------------- synapse/storage/directory.py | 3 +- synapse/storage/event_federation.py | 3 +- synapse/storage/keys.py | 3 +- synapse/storage/presence.py | 3 +- synapse/storage/push_rule.py | 3 +- synapse/storage/receipts.py | 3 +- synapse/storage/registration.py | 3 +- synapse/storage/room.py | 3 +- synapse/storage/roommember.py | 3 +- synapse/storage/state.py | 5 +- synapse/storage/stream.py | 3 +- synapse/storage/transactions.py | 3 +- synapse/util/caches/__init__.py | 14 + synapse/util/caches/descriptors.py | 359 ++++++++++++++++++ synapse/util/{ => caches}/dictionary_cache.py | 2 +- synapse/util/{ => caches}/expiringcache.py | 0 synapse/util/{ => caches}/lrucache.py | 0 tests/storage/test__base.py | 2 +- tests/util/test_dict_cache.py | 2 +- tests/util/test_lrucache.py | 4 +- 23 files changed, 408 insertions(+), 352 deletions(-) create mode 100644 synapse/util/caches/__init__.py create mode 100644 synapse/util/caches/descriptors.py rename synapse/util/{ => caches}/dictionary_cache.py (98%) rename synapse/util/{ => caches}/expiringcache.py (100%) rename synapse/util/{ => caches}/lrucache.py (100%) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 7736d14fb..58a6d6a0e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -23,7 +23,7 @@ from synapse.api.errors import ( CodeMessageException, HttpResponseException, SynapseError, ) from synapse.util import unwrapFirstError -from synapse.util.expiringcache import ExpiringCache +from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logutils import log_function from synapse.events import FrozenEvent import synapse.metrics diff --git a/synapse/state.py b/synapse/state.py index b5e5d7bbd..1fe4d066b 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -18,7 +18,7 @@ from twisted.internet import defer from synapse.util.logutils import log_function from synapse.util.async import run_on_reactor -from synapse.util.expiringcache import ExpiringCache +from synapse.util.caches.expiringcache import ExpiringCache from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.auth import AuthEventTypes diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index e5441aafb..1444767a5 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -15,27 +15,22 @@ import logging from synapse.api.errors import StoreError -from synapse.util.async import ObservableDeferred -from synapse.util import unwrapFirstError from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_context_over_fn, LoggingContext -from synapse.util.lrucache import LruCache -from synapse.util.dictionary_cache import DictionaryCache +from synapse.util.caches.dictionary_cache import DictionaryCache +from synapse.util.caches.descriptors import Cache import synapse.metrics from util.id_generators import IdGenerator, StreamIdGenerator from twisted.internet import defer -from collections import namedtuple, OrderedDict +from collections import namedtuple -import functools -import inspect import sys import time import threading -DEBUG_CACHES = False logger = logging.getLogger(__name__) @@ -51,330 +46,6 @@ sql_scheduling_timer = metrics.register_distribution("schedule_time") sql_query_timer = metrics.register_distribution("query_time", labels=["verb"]) sql_txn_timer = metrics.register_distribution("transaction_time", labels=["desc"]) -caches_by_name = {} -cache_counter = metrics.register_cache( - "cache", - lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, - labels=["name"], -) - - -_CacheSentinel = object() - - -class Cache(object): - - def __init__(self, name, max_entries=1000, keylen=1, lru=True): - if lru: - self.cache = LruCache(max_size=max_entries) - self.max_entries = None - else: - self.cache = OrderedDict() - self.max_entries = max_entries - - self.name = name - self.keylen = keylen - self.sequence = 0 - self.thread = None - caches_by_name[name] = self.cache - - def check_thread(self): - expected_thread = self.thread - if expected_thread is None: - self.thread = threading.current_thread() - else: - if expected_thread is not threading.current_thread(): - raise ValueError( - "Cache objects can only be accessed from the main thread" - ) - - def get(self, key, default=_CacheSentinel): - val = self.cache.get(key, _CacheSentinel) - if val is not _CacheSentinel: - cache_counter.inc_hits(self.name) - return val - - cache_counter.inc_misses(self.name) - - if default is _CacheSentinel: - raise KeyError() - else: - return default - - def update(self, sequence, key, value): - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - self.prefill(key, value) - - def prefill(self, key, value): - if self.max_entries is not None: - while len(self.cache) >= self.max_entries: - self.cache.popitem(last=False) - - self.cache[key] = value - - def invalidate(self, key): - self.check_thread() - if not isinstance(key, tuple): - raise ValueError("keyargs must be a tuple.") - - # Increment the sequence number so that any SELECT statements that - # raced with the INSERT don't update the cache (SYN-369) - self.sequence += 1 - self.cache.pop(key, None) - - def invalidate_all(self): - self.check_thread() - self.sequence += 1 - self.cache.clear() - - -class CacheDescriptor(object): - """ A method decorator that applies a memoizing cache around the function. - - This caches deferreds, rather than the results themselves. Deferreds that - fail are removed from the cache. - - The function is presumed to take zero or more arguments, which are used in - a tuple as the key for the cache. Hits are served directly from the cache; - misses use the function body to generate the value. - - The wrapped function has an additional member, a callable called - "invalidate". This can be used to remove individual entries from the cache. - - The wrapped function has another additional callable, called "prefill", - which can be used to insert values into the cache specifically, without - calling the calculation function. - """ - def __init__(self, orig, max_entries=1000, num_args=1, lru=True, - inlineCallbacks=False): - self.orig = orig - - if inlineCallbacks: - self.function_to_call = defer.inlineCallbacks(orig) - else: - self.function_to_call = orig - - self.max_entries = max_entries - self.num_args = num_args - self.lru = lru - - self.arg_names = inspect.getargspec(orig).args[1:num_args+1] - - if len(self.arg_names) < self.num_args: - raise Exception( - "Not enough explicit positional arguments to key off of for %r." - " (@cached cannot key off of *args or **kwars)" - % (orig.__name__,) - ) - - self.cache = Cache( - name=self.orig.__name__, - max_entries=self.max_entries, - keylen=self.num_args, - lru=self.lru, - ) - - def __get__(self, obj, objtype=None): - - @functools.wraps(self.orig) - def wrapped(*args, **kwargs): - arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) - cache_key = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) - try: - cached_result_d = self.cache.get(cache_key) - - observer = cached_result_d.observe() - if DEBUG_CACHES: - @defer.inlineCallbacks - def check_result(cached_result): - actual_result = yield self.function_to_call(obj, *args, **kwargs) - if actual_result != cached_result: - logger.error( - "Stale cache entry %s%r: cached: %r, actual %r", - self.orig.__name__, cache_key, - cached_result, actual_result, - ) - raise ValueError("Stale cache entry") - defer.returnValue(cached_result) - observer.addCallback(check_result) - - return observer - except KeyError: - # Get the sequence number of the cache before reading from the - # database so that we can tell if the cache is invalidated - # while the SELECT is executing (SYN-369) - sequence = self.cache.sequence - - ret = defer.maybeDeferred( - self.function_to_call, - obj, *args, **kwargs - ) - - def onErr(f): - self.cache.invalidate(cache_key) - return f - - ret.addErrback(onErr) - - ret = ObservableDeferred(ret, consumeErrors=True) - self.cache.update(sequence, cache_key, ret) - - return ret.observe() - - wrapped.invalidate = self.cache.invalidate - wrapped.invalidate_all = self.cache.invalidate_all - wrapped.prefill = self.cache.prefill - - obj.__dict__[self.orig.__name__] = wrapped - - return wrapped - - -class CacheListDescriptor(object): - """Wraps an existing cache to support bulk fetching of keys. - - Given a list of keys it looks in the cache to find any hits, then passes - the list of missing keys to the wrapped fucntion. - """ - - def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False): - """ - Args: - orig (function) - cache (Cache) - list_name (str): Name of the argument which is the bulk lookup list - num_args (int) - inlineCallbacks (bool): Whether orig is a generator that should - be wrapped by defer.inlineCallbacks - """ - self.orig = orig - - if inlineCallbacks: - self.function_to_call = defer.inlineCallbacks(orig) - else: - self.function_to_call = orig - - self.num_args = num_args - self.list_name = list_name - - self.arg_names = inspect.getargspec(orig).args[1:num_args+1] - self.list_pos = self.arg_names.index(self.list_name) - - self.cache = cache - - self.sentinel = object() - - if len(self.arg_names) < self.num_args: - raise Exception( - "Not enough explicit positional arguments to key off of for %r." - " (@cached cannot key off of *args or **kwars)" - % (orig.__name__,) - ) - - if self.list_name not in self.arg_names: - raise Exception( - "Couldn't see arguments %r for %r." - % (self.list_name, cache.name,) - ) - - def __get__(self, obj, objtype=None): - - @functools.wraps(self.orig) - def wrapped(*args, **kwargs): - arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) - keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] - list_args = arg_dict[self.list_name] - - # cached is a dict arg -> deferred, where deferred results in a - # 2-tuple (`arg`, `result`) - cached = {} - missing = [] - for arg in list_args: - key = list(keyargs) - key[self.list_pos] = arg - - try: - res = self.cache.get(tuple(key)).observe() - res.addCallback(lambda r, arg: (arg, r), arg) - cached[arg] = res - except KeyError: - missing.append(arg) - - if missing: - sequence = self.cache.sequence - args_to_call = dict(arg_dict) - args_to_call[self.list_name] = missing - - ret_d = defer.maybeDeferred( - self.function_to_call, - **args_to_call - ) - - ret_d = ObservableDeferred(ret_d) - - # We need to create deferreds for each arg in the list so that - # we can insert the new deferred into the cache. - for arg in missing: - observer = ret_d.observe() - observer.addCallback(lambda r, arg: r[arg], arg) - - observer = ObservableDeferred(observer) - - key = list(keyargs) - key[self.list_pos] = arg - self.cache.update(sequence, tuple(key), observer) - - def invalidate(f, key): - self.cache.invalidate(key) - return f - observer.addErrback(invalidate, tuple(key)) - - res = observer.observe() - res.addCallback(lambda r, arg: (arg, r), arg) - - cached[arg] = res - - return defer.gatherResults( - cached.values(), - consumeErrors=True, - ).addErrback(unwrapFirstError).addCallback(lambda res: dict(res)) - - obj.__dict__[self.orig.__name__] = wrapped - - return wrapped - - -def cached(max_entries=1000, num_args=1, lru=True): - return lambda orig: CacheDescriptor( - orig, - max_entries=max_entries, - num_args=num_args, - lru=lru - ) - - -def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False): - return lambda orig: CacheDescriptor( - orig, - max_entries=max_entries, - num_args=num_args, - lru=lru, - inlineCallbacks=True, - ) - - -def cachedList(cache, list_name, num_args=1, inlineCallbacks=False): - return lambda orig: CacheListDescriptor( - orig, - cache=cache, - list_name=list_name, - num_args=num_args, - inlineCallbacks=inlineCallbacks, - ) - class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index f3947bbe8..d92028ea4 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached from synapse.api.errors import SynapseError diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 910b6598a..25cc84eb9 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -15,7 +15,8 @@ from twisted.internet import defer -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached from syutil.base64util import encode_base64 import logging diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index 49b8e37cf..ffd6daa88 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from _base import SQLBaseStore, cachedInlineCallbacks +from _base import SQLBaseStore +from synapse.util.caches.descriptors import cachedInlineCallbacks from twisted.internet import defer diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 576cf670c..4f91a2b87 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached from twisted.internet import defer diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 9b88ca7b3..5305b7e12 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cachedInlineCallbacks +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cachedInlineCallbacks from twisted.internet import defer import logging diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index b79d6683c..cac1a5657 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cachedInlineCallbacks +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cachedInlineCallbacks from twisted.internet import defer diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 4eaa088b3..aa446f94c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -17,7 +17,8 @@ from twisted.internet import defer from synapse.api.errors import StoreError, Codes -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached class RegistrationStore(SQLBaseStore): diff --git a/synapse/storage/room.py b/synapse/storage/room.py index dd5bc2c8f..5e07b7e0e 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -17,7 +17,8 @@ from twisted.internet import defer from synapse.api.errors import StoreError -from ._base import SQLBaseStore, cachedInlineCallbacks +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cachedInlineCallbacks import collections import logging diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 9f14f38f2..8eee2dfbc 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -17,7 +17,8 @@ from twisted.internet import defer from collections import namedtuple -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached from synapse.api.constants import Membership from synapse.types import UserID diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ea5fa9de7..79c3b82d9 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -13,7 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached, cachedInlineCallbacks, cachedList +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import ( + cached, cachedInlineCallbacks, cachedList +) from twisted.internet import defer diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index b59fe8100..d7fe423f5 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -35,7 +35,8 @@ what sort order was used: from twisted.internet import defer -from ._base import SQLBaseStore, cachedInlineCallbacks +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.logutils import log_function diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 624da4a9d..c8c7e6591 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore, cached +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached from collections import namedtuple diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py new file mode 100644 index 000000000..1a84d94cd --- /dev/null +++ b/synapse/util/caches/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py new file mode 100644 index 000000000..82dd09cf5 --- /dev/null +++ b/synapse/util/caches/descriptors.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from synapse.util.async import ObservableDeferred +from synapse.util import unwrapFirstError +from synapse.util.caches.lrucache import LruCache +import synapse.metrics + +from twisted.internet import defer + +from collections import OrderedDict + +import functools +import inspect +import threading + +logger = logging.getLogger(__name__) + + +DEBUG_CACHES = False + +metrics = synapse.metrics.get_metrics_for("synapse.util.caches") + +caches_by_name = {} +cache_counter = metrics.register_cache( + "cache", + lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, + labels=["name"], +) + + +_CacheSentinel = object() + + +class Cache(object): + + def __init__(self, name, max_entries=1000, keylen=1, lru=True): + if lru: + self.cache = LruCache(max_size=max_entries) + self.max_entries = None + else: + self.cache = OrderedDict() + self.max_entries = max_entries + + self.name = name + self.keylen = keylen + self.sequence = 0 + self.thread = None + caches_by_name[name] = self.cache + + def check_thread(self): + expected_thread = self.thread + if expected_thread is None: + self.thread = threading.current_thread() + else: + if expected_thread is not threading.current_thread(): + raise ValueError( + "Cache objects can only be accessed from the main thread" + ) + + def get(self, key, default=_CacheSentinel): + val = self.cache.get(key, _CacheSentinel) + if val is not _CacheSentinel: + cache_counter.inc_hits(self.name) + return val + + cache_counter.inc_misses(self.name) + + if default is _CacheSentinel: + raise KeyError() + else: + return default + + def update(self, sequence, key, value): + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + self.prefill(key, value) + + def prefill(self, key, value): + if self.max_entries is not None: + while len(self.cache) >= self.max_entries: + self.cache.popitem(last=False) + + self.cache[key] = value + + def invalidate(self, key): + self.check_thread() + if not isinstance(key, tuple): + raise ValueError("keyargs must be a tuple.") + + # Increment the sequence number so that any SELECT statements that + # raced with the INSERT don't update the cache (SYN-369) + self.sequence += 1 + self.cache.pop(key, None) + + def invalidate_all(self): + self.check_thread() + self.sequence += 1 + self.cache.clear() + + +class CacheDescriptor(object): + """ A method decorator that applies a memoizing cache around the function. + + This caches deferreds, rather than the results themselves. Deferreds that + fail are removed from the cache. + + The function is presumed to take zero or more arguments, which are used in + a tuple as the key for the cache. Hits are served directly from the cache; + misses use the function body to generate the value. + + The wrapped function has an additional member, a callable called + "invalidate". This can be used to remove individual entries from the cache. + + The wrapped function has another additional callable, called "prefill", + which can be used to insert values into the cache specifically, without + calling the calculation function. + """ + def __init__(self, orig, max_entries=1000, num_args=1, lru=True, + inlineCallbacks=False): + self.orig = orig + + if inlineCallbacks: + self.function_to_call = defer.inlineCallbacks(orig) + else: + self.function_to_call = orig + + self.max_entries = max_entries + self.num_args = num_args + self.lru = lru + + self.arg_names = inspect.getargspec(orig).args[1:num_args+1] + + if len(self.arg_names) < self.num_args: + raise Exception( + "Not enough explicit positional arguments to key off of for %r." + " (@cached cannot key off of *args or **kwars)" + % (orig.__name__,) + ) + + self.cache = Cache( + name=self.orig.__name__, + max_entries=self.max_entries, + keylen=self.num_args, + lru=self.lru, + ) + + def __get__(self, obj, objtype=None): + + @functools.wraps(self.orig) + def wrapped(*args, **kwargs): + arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) + cache_key = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names) + try: + cached_result_d = self.cache.get(cache_key) + + observer = cached_result_d.observe() + if DEBUG_CACHES: + @defer.inlineCallbacks + def check_result(cached_result): + actual_result = yield self.function_to_call(obj, *args, **kwargs) + if actual_result != cached_result: + logger.error( + "Stale cache entry %s%r: cached: %r, actual %r", + self.orig.__name__, cache_key, + cached_result, actual_result, + ) + raise ValueError("Stale cache entry") + defer.returnValue(cached_result) + observer.addCallback(check_result) + + return observer + except KeyError: + # Get the sequence number of the cache before reading from the + # database so that we can tell if the cache is invalidated + # while the SELECT is executing (SYN-369) + sequence = self.cache.sequence + + ret = defer.maybeDeferred( + self.function_to_call, + obj, *args, **kwargs + ) + + def onErr(f): + self.cache.invalidate(cache_key) + return f + + ret.addErrback(onErr) + + ret = ObservableDeferred(ret, consumeErrors=True) + self.cache.update(sequence, cache_key, ret) + + return ret.observe() + + wrapped.invalidate = self.cache.invalidate + wrapped.invalidate_all = self.cache.invalidate_all + wrapped.prefill = self.cache.prefill + + obj.__dict__[self.orig.__name__] = wrapped + + return wrapped + + +class CacheListDescriptor(object): + """Wraps an existing cache to support bulk fetching of keys. + + Given a list of keys it looks in the cache to find any hits, then passes + the list of missing keys to the wrapped fucntion. + """ + + def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False): + """ + Args: + orig (function) + cache (Cache) + list_name (str): Name of the argument which is the bulk lookup list + num_args (int) + inlineCallbacks (bool): Whether orig is a generator that should + be wrapped by defer.inlineCallbacks + """ + self.orig = orig + + if inlineCallbacks: + self.function_to_call = defer.inlineCallbacks(orig) + else: + self.function_to_call = orig + + self.num_args = num_args + self.list_name = list_name + + self.arg_names = inspect.getargspec(orig).args[1:num_args+1] + self.list_pos = self.arg_names.index(self.list_name) + + self.cache = cache + + self.sentinel = object() + + if len(self.arg_names) < self.num_args: + raise Exception( + "Not enough explicit positional arguments to key off of for %r." + " (@cached cannot key off of *args or **kwars)" + % (orig.__name__,) + ) + + if self.list_name not in self.arg_names: + raise Exception( + "Couldn't see arguments %r for %r." + % (self.list_name, cache.name,) + ) + + def __get__(self, obj, objtype=None): + + @functools.wraps(self.orig) + def wrapped(*args, **kwargs): + arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs) + keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names] + list_args = arg_dict[self.list_name] + + # cached is a dict arg -> deferred, where deferred results in a + # 2-tuple (`arg`, `result`) + cached = {} + missing = [] + for arg in list_args: + key = list(keyargs) + key[self.list_pos] = arg + + try: + res = self.cache.get(tuple(key)).observe() + res.addCallback(lambda r, arg: (arg, r), arg) + cached[arg] = res + except KeyError: + missing.append(arg) + + if missing: + sequence = self.cache.sequence + args_to_call = dict(arg_dict) + args_to_call[self.list_name] = missing + + ret_d = defer.maybeDeferred( + self.function_to_call, + **args_to_call + ) + + ret_d = ObservableDeferred(ret_d) + + # We need to create deferreds for each arg in the list so that + # we can insert the new deferred into the cache. + for arg in missing: + observer = ret_d.observe() + observer.addCallback(lambda r, arg: r[arg], arg) + + observer = ObservableDeferred(observer) + + key = list(keyargs) + key[self.list_pos] = arg + self.cache.update(sequence, tuple(key), observer) + + def invalidate(f, key): + self.cache.invalidate(key) + return f + observer.addErrback(invalidate, tuple(key)) + + res = observer.observe() + res.addCallback(lambda r, arg: (arg, r), arg) + + cached[arg] = res + + return defer.gatherResults( + cached.values(), + consumeErrors=True, + ).addErrback(unwrapFirstError).addCallback(lambda res: dict(res)) + + obj.__dict__[self.orig.__name__] = wrapped + + return wrapped + + +def cached(max_entries=1000, num_args=1, lru=True): + return lambda orig: CacheDescriptor( + orig, + max_entries=max_entries, + num_args=num_args, + lru=lru + ) + + +def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False): + return lambda orig: CacheDescriptor( + orig, + max_entries=max_entries, + num_args=num_args, + lru=lru, + inlineCallbacks=True, + ) + + +def cachedList(cache, list_name, num_args=1, inlineCallbacks=False): + return lambda orig: CacheListDescriptor( + orig, + cache=cache, + list_name=list_name, + num_args=num_args, + inlineCallbacks=inlineCallbacks, + ) diff --git a/synapse/util/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py similarity index 98% rename from synapse/util/dictionary_cache.py rename to synapse/util/caches/dictionary_cache.py index c7564cdf0..26d464f4f 100644 --- a/synapse/util/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.lrucache import LruCache +from synapse.util.caches.lrucache import LruCache from collections import namedtuple import threading import logging diff --git a/synapse/util/expiringcache.py b/synapse/util/caches/expiringcache.py similarity index 100% rename from synapse/util/expiringcache.py rename to synapse/util/caches/expiringcache.py diff --git a/synapse/util/lrucache.py b/synapse/util/caches/lrucache.py similarity index 100% rename from synapse/util/lrucache.py rename to synapse/util/caches/lrucache.py diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index abee2f631..e72cace8f 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -19,7 +19,7 @@ from twisted.internet import defer from synapse.util.async import ObservableDeferred -from synapse.storage._base import Cache, cached +from synapse.util.caches.descriptors import Cache, cached class CacheTestCase(unittest.TestCase): diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index 79bc1225d..54ff26cd9 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -17,7 +17,7 @@ from twisted.internet import defer from tests import unittest -from synapse.util.dictionary_cache import DictionaryCache +from synapse.util.caches.dictionary_cache import DictionaryCache class DictCacheTestCase(unittest.TestCase): diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py index ab934bf92..fc5a90432 100644 --- a/tests/util/test_lrucache.py +++ b/tests/util/test_lrucache.py @@ -16,7 +16,7 @@ from .. import unittest -from synapse.util.lrucache import LruCache +from synapse.util.caches.lrucache import LruCache class LruCacheTestCase(unittest.TestCase): @@ -52,5 +52,3 @@ class LruCacheTestCase(unittest.TestCase): cache["key"] = 1 self.assertEquals(cache.pop("key"), 1) self.assertEquals(cache.pop("key"), None) - - From b24d7ebd6e57154d13f3a3a8e1aacc20ef34b417 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 09:39:07 +0100 Subject: [PATCH 148/266] Fix the cleanup script to use the right $DIR --- demo/clean.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/clean.sh b/demo/clean.sh index 31007737f..418ca9457 100755 --- a/demo/clean.sh +++ b/demo/clean.sh @@ -12,8 +12,8 @@ if [ -f $PID_FILE ]; then fi for port in 8080 8081 8082; do - rm -rf demo/$port - rm -rf demo/media_store.$port + rm -rf $DIR/$port + rm -rf $DIR/media_store.$port done rm -rf $DIR/etc From 4807616e1615bdaaee56f800ba682d0d019de610 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 10:13:35 +0100 Subject: [PATCH 149/266] Wire up the dictionarycache to the metrics --- synapse/util/caches/__init__.py | 13 +++++++ synapse/util/caches/descriptors.py | 17 ++------ synapse/util/caches/dictionary_cache.py | 52 +++++++++++-------------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 1a84d94cd..da0e06a46 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -12,3 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import synapse.metrics + +DEBUG_CACHES = False + +metrics = synapse.metrics.get_metrics_for("synapse.util.caches") + +caches_by_name = {} +cache_counter = metrics.register_cache( + "cache", + lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, + labels=["name"], +) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 82dd09cf5..c99fda849 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 OpenMarket Ltd +# Copyright 2015 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,8 @@ import logging from synapse.util.async import ObservableDeferred from synapse.util import unwrapFirstError from synapse.util.caches.lrucache import LruCache -import synapse.metrics + +from . import caches_by_name, DEBUG_CACHES, cache_counter from twisted.internet import defer @@ -30,18 +31,6 @@ import threading logger = logging.getLogger(__name__) -DEBUG_CACHES = False - -metrics = synapse.metrics.get_metrics_for("synapse.util.caches") - -caches_by_name = {} -cache_counter = metrics.register_cache( - "cache", - lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, - labels=["name"], -) - - _CacheSentinel = object() diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 26d464f4f..e69adf62f 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -15,6 +15,7 @@ from synapse.util.caches.lrucache import LruCache from collections import namedtuple +from . import caches_by_name, cache_counter import threading import logging @@ -42,6 +43,7 @@ class DictionaryCache(object): __slots__ = [] self.sentinel = Sentinel() + caches_by_name[name] = self.cache def check_thread(self): expected_thread = self.thread @@ -54,25 +56,21 @@ class DictionaryCache(object): ) def get(self, key, dict_keys=None): - try: - entry = self.cache.get(key, self.sentinel) - if entry is not self.sentinel: - # cache_counter.inc_hits(self.name) + entry = self.cache.get(key, self.sentinel) + if entry is not self.sentinel: + cache_counter.inc_hits(self.name) - if dict_keys is None: - return DictionaryEntry(entry.full, dict(entry.value)) - else: - return DictionaryEntry(entry.full, { - k: entry.value[k] - for k in dict_keys - if k in entry.value - }) + if dict_keys is None: + return DictionaryEntry(entry.full, dict(entry.value)) + else: + return DictionaryEntry(entry.full, { + k: entry.value[k] + for k in dict_keys + if k in entry.value + }) - # cache_counter.inc_misses(self.name) - return DictionaryEntry(False, {}) - except: - logger.exception("get failed") - raise + cache_counter.inc_misses(self.name) + return DictionaryEntry(False, {}) def invalidate(self, key): self.check_thread() @@ -88,18 +86,14 @@ class DictionaryCache(object): self.cache.clear() def update(self, sequence, key, value, full=False): - try: - self.check_thread() - if self.sequence == sequence: - # Only update the cache if the caches sequence number matches the - # number that the cache had before the SELECT was started (SYN-369) - if full: - self._insert(key, value) - else: - self._update_or_insert(key, value) - except: - logger.exception("update failed") - raise + self.check_thread() + if self.sequence == sequence: + # Only update the cache if the caches sequence number matches the + # number that the cache had before the SELECT was started (SYN-369) + if full: + self._insert(key, value) + else: + self._update_or_insert(key, value) def _update_or_insert(self, key, value): entry = self.cache.setdefault(key, DictionaryEntry(False, {})) From 7e3d1c7d92157a3cce8ed975f2a982a6a80693d0 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 10:54:38 +0100 Subject: [PATCH 150/266] Make a config option for whether to generate new thumbnail sizes dynamically --- synapse/config/repository.py | 8 +++++++ synapse/rest/media/v1/base_resource.py | 1 + synapse/rest/media/v1/thumbnail_resource.py | 25 +++++++++++++++------ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6891abd71..748dd14e2 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -22,6 +22,7 @@ class ContentRepositoryConfig(Config): self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.media_store_path = self.ensure_directory(config["media_store_path"]) self.uploads_path = self.ensure_directory(config["uploads_path"]) + self.dynamic_thumbnails = config["dynamic_thumbnails"] def default_config(self, config_dir_path, server_name): media_store = self.default_path("media_store") @@ -38,4 +39,11 @@ class ContentRepositoryConfig(Config): # Maximum number of pixels that will be thumbnailed max_image_pixels: "32M" + + # Whether to generate new thumbnails on the fly to precisely match + # the resolution requested by the client. If true then whenever + # a new resolution is requested by the client the server will + # generate a new thumbnail. If false the server will pick a thumbnail + # from a precalcualted list. + dynamic_thumbnails: false """ % locals() diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 093ba847d..e39729489 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -69,6 +69,7 @@ class BaseMediaResource(Resource): self.filepaths = filepaths self.version_string = hs.version_string self.downloads = {} + self.dynamic_thumbnails = hs.config.dynamic_thumbnails def _respond_404(self, request): respond_with_json( diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 9387258a7..e506dad93 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -43,14 +43,25 @@ class ThumbnailResource(BaseMediaResource): m_type = parse_string(request, "type", "image/png") if server_name == self.server_name: - yield self._select_or_generate_local_thumbnail( - request, media_id, width, height, method, m_type - ) + if self.dynamic_thumbnails: + yield self._select_or_generate_local_thumbnail( + request, media_id, width, height, method, m_type + ) + else: + yield self._respond_local_thumbnail( + request, media_id, width, height, method, m_type + ) else: - yield self._select_or_generate_remote_thumbnail( - request, server_name, media_id, - width, height, method, m_type - ) + if self.dynamic_thumbnails: + yield self._select_or_generate_remote_thumbnail( + request, server_name, media_id, + width, height, method, m_type + ) + else: + yield self._respond_remote_thumbnail( + request, server_name, media_id, + width, height, method, m_type + ) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, From fdb724cb7040a7746b2a6c6d8aabbf3654daf8dd Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 10:55:27 +0100 Subject: [PATCH 151/266] Add config option for setting the list of thumbnail sizes to precalculate --- synapse/config/repository.py | 39 ++++++++++++++++++++++++++ synapse/rest/media/v1/base_resource.py | 18 ++---------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 748dd14e2..7cab87442 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -14,6 +14,27 @@ # limitations under the License. from ._base import Config +from collections import namedtuple + +ThumbnailRequirement = namedtuple( + "ThumbnailRequirement", ["width", "height", "method", "media_type"] +) + +def parse_thumbnail_requirements(thumbnail_sizes): + requirements = {} + for size in thumbnail_sizes: + width = size["width"] + height = size["height"] + method = size["method"] + jpeg_thumbnail = ThumbnailRequirement(width, height, method, "image/jpeg") + png_thumbnail = ThumbnailRequirement(width, height, method, "image/png") + requirements.setdefault("image/jpeg", []).append(jpeg_thumbnail) + requirements.setdefault("image/gif", []).append(png_thumbnail) + requirements.setdefault("image/png", []).append(png_thumbnail) + return { + media_type: tuple(thumbnails) + for media_type, thumbnails in requirements.items() + } class ContentRepositoryConfig(Config): @@ -23,6 +44,9 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] + self.thumbnail_requirements = parse_thumbnail_requirements( + config["thumbnail_sizes"] + ) def default_config(self, config_dir_path, server_name): media_store = self.default_path("media_store") @@ -46,4 +70,19 @@ class ContentRepositoryConfig(Config): # generate a new thumbnail. If false the server will pick a thumbnail # from a precalcualted list. dynamic_thumbnails: false + + # List of thumbnail to precalculate when an image is uploaded. + thumbnail_sizes: + - width: 32 + height: 32 + method: crop + - width: 96 + height: 96 + method: crop + - width: 320 + height: 240 + method: scale + - width: 640 + height: 480 + method: scale """ % locals() diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index e39729489..271cbca2d 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -70,6 +70,7 @@ class BaseMediaResource(Resource): self.version_string = hs.version_string self.downloads = {} self.dynamic_thumbnails = hs.config.dynamic_thumbnails + self.thumbnail_requirements = hs.config.thumbnail_requirements def _respond_404(self, request): respond_with_json( @@ -209,22 +210,7 @@ class BaseMediaResource(Resource): self._respond_404(request) def _get_thumbnail_requirements(self, media_type): - if media_type == "image/jpeg": - return ( - (32, 32, "crop", "image/jpeg"), - (96, 96, "crop", "image/jpeg"), - (320, 240, "scale", "image/jpeg"), - (640, 480, "scale", "image/jpeg"), - ) - elif (media_type == "image/png") or (media_type == "image/gif"): - return ( - (32, 32, "crop", "image/png"), - (96, 96, "crop", "image/png"), - (320, 240, "scale", "image/png"), - (640, 480, "scale", "image/png"), - ) - else: - return () + return self.thumbnail_requirements.get(media_type, ()) def _generate_thumbnail(self, input_path, t_path, t_width, t_height, t_method, t_type): From 7bbaab94327d806617fc136aab51303baed060cf Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 11:57:37 +0100 Subject: [PATCH 152/266] Fix the --generate-keys option. Make it do the same thing as --generate-config does when the config file exists, but without printing a warning --- synapse/config/_base.py | 83 ++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index c408db2b4..73f695995 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -149,6 +149,8 @@ class Config(object): ) config_args, remaining_args = config_parser.parse_known_args(argv) + generate_keys = config_args.generate_keys + if config_args.generate_config: if not config_args.config_path: config_parser.error( @@ -156,51 +158,40 @@ class Config(object): " generated using \"--generate-config -H SERVER_NAME" " -c CONFIG-FILE\"" ) - - config_dir_path = os.path.dirname(config_args.config_path[0]) - config_dir_path = os.path.abspath(config_dir_path) - - server_name = config_args.server_name - if not server_name: - print "Must specify a server_name to a generate config for." - sys.exit(1) (config_path,) = config_args.config_path - if not os.path.exists(config_dir_path): - os.makedirs(config_dir_path) - if os.path.exists(config_path): - print "Config file %r already exists" % (config_path,) - yaml_config = cls.read_config_file(config_path) - yaml_name = yaml_config["server_name"] - if server_name != yaml_name: - print ( - "Config file %r has a different server_name: " - " %r != %r" % (config_path, server_name, yaml_name) - ) + if not os.path.exists(config_path): + config_dir_path = os.path.dirname(config_path) + config_dir_path = os.path.abspath(config_dir_path) + + server_name = config_args.server_name + if not server_name: + print "Must specify a server_name to a generate config for." sys.exit(1) - config_bytes, config = obj.generate_config( - config_dir_path, server_name - ) - config.update(yaml_config) - print "Generating any missing keys for %r" % (server_name,) - obj.invoke_all("generate_files", config) - sys.exit(0) - with open(config_path, "wb") as config_file: - config_bytes, config = obj.generate_config( - config_dir_path, server_name - ) - obj.invoke_all("generate_files", config) - config_file.write(config_bytes) + if not os.path.exists(config_dir_path): + os.makedirs(config_dir_path) + with open(config_path, "wb") as config_file: + config_bytes, config = obj.generate_config( + config_dir_path, server_name + ) + obj.invoke_all("generate_files", config) + config_file.write(config_bytes) print ( - "A config file has been generated in %s for server name" - " '%s' with corresponding SSL keys and self-signed" - " certificates. Please review this file and customise it to" - " your needs." + "A config file has been generated in %r for server name" + " %r with corresponding SSL keys and self-signed" + " certificates. Please review this file and customise it" + " to your needs." ) % (config_path, server_name) - print ( - "If this server name is incorrect, you will need to regenerate" - " the SSL certificates" - ) - sys.exit(0) + print ( + "If this server name is incorrect, you will need to" + " regenerate the SSL certificates" + ) + sys.exit(0) + else: + print ( + "Config file %r already exists. Generating any missing key" + " files." + ) % (config_path,) + generate_keys = True parser = argparse.ArgumentParser( parents=[config_parser], @@ -218,7 +209,7 @@ class Config(object): " -c CONFIG-FILE\"" ) - config_dir_path = os.path.dirname(config_args.config_path[0]) + config_dir_path = os.path.dirname(config_args.config_path[-1]) config_dir_path = os.path.abspath(config_dir_path) specified_config = {} @@ -231,12 +222,12 @@ class Config(object): config.pop("log_config") config.update(specified_config) + if generate_keys: + obj.invoke_all("generate_files", config) + sys.exit(0) + obj.invoke_all("read_config", config) obj.invoke_all("read_arguments", args) - if config_args.generate_keys: - obj.invoke_all("generate_files", config) - sys.exit(0) - return obj From de3b7b55d68ebad3d535dad2643e747460dc22a3 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 14:29:17 +0100 Subject: [PATCH 153/266] Doc-string for config ultility function --- synapse/config/repository.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 7cab87442..2cb0812d7 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -21,6 +21,17 @@ ThumbnailRequirement = namedtuple( ) def parse_thumbnail_requirements(thumbnail_sizes): + """ Takes a list of dictionaries with "width", "height", and "method" keys + and creates a map from image media types to the thumbnail size, thumnailing + method, and thumbnail media type to precalculate + + Args: + thumbnail_sizes(list): List of dicts with "width", "height", and + "method" keys + Returns: + Dictionary mapping from media type string to list of + ThumbnailRequirement tuples. + """ requirements = {} for size in thumbnail_sizes: width = size["width"] From 73605f80705f4a0c7271cabfee40a47c7764d72d Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 12 Aug 2015 15:40:54 +0100 Subject: [PATCH 154/266] Just leaving off the $ is fine. r* == registerrrrrrrrr --- synapse/rest/client/v2_alpha/register.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 25dab6f9c..e6ad35aa1 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) class RegisterRestServlet(RestServlet): - PATTERN = client_v2_pattern("/register*") + PATTERN = client_v2_pattern("/register") def __init__(self, hs): super(RegisterRestServlet, self).__init__() From f43041aacd36b7d9052476bcb0d083ea4213a9f9 Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 12 Aug 2015 15:44:08 +0100 Subject: [PATCH 155/266] Check absent before trying to access keys --- synapse/rest/client/v2_alpha/register.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e6ad35aa1..254c5f1dd 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -224,6 +224,9 @@ class RegisterRestServlet(RestServlet): if k not in body: absent.append(k) + if len(absent) > 0: + raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) @@ -231,9 +234,6 @@ class RegisterRestServlet(RestServlet): if existingUid is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if len(absent) > 0: - raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) - ret = yield self.identity_handler.requestEmailToken(**body) defer.returnValue((200, ret)) From 415c2f05491ce65a4fc34326519754cd1edd9c54 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Wed, 12 Aug 2015 15:49:37 +0100 Subject: [PATCH 156/266] Simplify LoginHander and AuthHandler * Merge LoginHandler -> AuthHandler * Add a bunch of documentation * Improve some naming * Remove unused branches I will start merging the actual logic of the two handlers shortly --- synapse/handlers/__init__.py | 2 - synapse/handlers/auth.py | 90 +++++++++++++++++++----- synapse/handlers/login.py | 83 ---------------------- synapse/handlers/register.py | 10 +-- synapse/push/pusherpool.py | 11 ++- synapse/rest/client/v1/login.py | 5 +- synapse/rest/client/v2_alpha/account.py | 8 +-- synapse/rest/client/v2_alpha/register.py | 3 +- synapse/storage/registration.py | 12 ++-- 9 files changed, 93 insertions(+), 131 deletions(-) delete mode 100644 synapse/handlers/login.py diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index dc5b6ef79..8725c3c42 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -22,7 +22,6 @@ from .room import ( from .message import MessageHandler from .events import EventStreamHandler, EventHandler from .federation import FederationHandler -from .login import LoginHandler from .profile import ProfileHandler from .presence import PresenceHandler from .directory import DirectoryHandler @@ -54,7 +53,6 @@ class Handlers(object): self.profile_handler = ProfileHandler(hs) self.presence_handler = PresenceHandler(hs) self.room_list_handler = RoomListHandler(hs) - self.login_handler = LoginHandler(hs) self.directory_handler = DirectoryHandler(hs) self.typing_notification_handler = TypingNotificationHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 1ecf7fef1..1504b00d7 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -47,17 +47,24 @@ class AuthHandler(BaseHandler): self.sessions = {} @defer.inlineCallbacks - def check_auth(self, flows, clientdict, clientip=None): + def check_auth(self, flows, clientdict, clientip): """ Takes a dictionary sent by the client in the login / registration protocol and handles the login flow. + As a side effect, this function fills in the 'creds' key on the user's + session with a map, which maps each auth-type (str) to the relevant + identity authenticated by that auth-type (mostly str, but for captcha, bool). + Args: - flows: list of list of stages - authdict: The dictionary from the client root level, not the - 'auth' key: this method prompts for auth if none is sent. + flows (list): A list of login flows. Each flow is an ordered list of + strings representing auth-types. At least one full + flow must be completed in order for auth to be successful. + clientdict: The dictionary from the client root level, not the + 'auth' key: this method prompts for auth if none is sent. + clientip (str): The IP address of the client. Returns: - A tuple of authed, dict, dict where authed is true if the client + A tuple of (authed, dict, dict) where authed is true if the client has successfully completed an auth flow. If it is true, the first dict contains the authenticated credentials of each stage. @@ -75,7 +82,7 @@ class AuthHandler(BaseHandler): del clientdict['auth'] if 'session' in authdict: sid = authdict['session'] - sess = self._get_session_info(sid) + session = self._get_session_info(sid) if len(clientdict) > 0: # This was designed to allow the client to omit the parameters @@ -87,20 +94,19 @@ class AuthHandler(BaseHandler): # on a home server. # Revisit: Assumimg the REST APIs do sensible validation, the data # isn't arbintrary. - sess['clientdict'] = clientdict - self._save_session(sess) - pass - elif 'clientdict' in sess: - clientdict = sess['clientdict'] + session['clientdict'] = clientdict + self._save_session(session) + elif 'clientdict' in session: + clientdict = session['clientdict'] if not authdict: defer.returnValue( - (False, self._auth_dict_for_flows(flows, sess), clientdict) + (False, self._auth_dict_for_flows(flows, session), clientdict) ) - if 'creds' not in sess: - sess['creds'] = {} - creds = sess['creds'] + if 'creds' not in session: + session['creds'] = {} + creds = session['creds'] # check auth type currently being presented if 'type' in authdict: @@ -109,15 +115,15 @@ class AuthHandler(BaseHandler): result = yield self.checkers[authdict['type']](authdict, clientip) if result: creds[authdict['type']] = result - self._save_session(sess) + self._save_session(session) for f in flows: if len(set(f) - set(creds.keys())) == 0: logger.info("Auth completed with creds: %r", creds) - self._remove_session(sess) + self._remove_session(session) defer.returnValue((True, creds, clientdict)) - ret = self._auth_dict_for_flows(flows, sess) + ret = self._auth_dict_for_flows(flows, session) ret['completed'] = creds.keys() defer.returnValue((False, ret, clientdict)) @@ -270,6 +276,54 @@ class AuthHandler(BaseHandler): return self.sessions[session_id] + @defer.inlineCallbacks + def login_with_password(self, user_id, password): + """ + Authenticates the user with their username and password. + + Used only by the v1 login API. + + Args: + user_id (str): User ID + password (str): Password + Returns: + The access token for the user's session. + Raises: + StoreError if there was a problem storing the token. + LoginError if there was an authentication problem. + """ + user_info = yield self.store.get_user_by_id(user_id=user_id) + if not user_info: + logger.warn("Attempted to login as %s but they do not exist", user_id) + raise LoginError(403, "", errcode=Codes.FORBIDDEN) + + stored_hash = user_info["password_hash"] + if not bcrypt.checkpw(password, stored_hash): + logger.warn("Failed password login for user %s", user_id) + raise LoginError(403, "", errcode=Codes.FORBIDDEN) + + reg_handler = self.hs.get_handlers().registration_handler + access_token = reg_handler.generate_token(user_id) + logger.info("Adding token %s for user %s", access_token, user_id) + yield self.store.add_access_token_to_user(user_id, access_token) + defer.returnValue(access_token) + + @defer.inlineCallbacks + def set_password(self, user_id, newpassword): + password_hash = bcrypt.hashpw(newpassword, bcrypt.gensalt()) + + yield self.store.user_set_password_hash(user_id, password_hash) + yield self.store.user_delete_access_tokens(user_id) + yield self.hs.get_pusherpool().remove_pushers_by_user(user_id) + yield self.store.flush_user(user_id) + + @defer.inlineCallbacks + def add_threepid(self, user_id, medium, address, validated_at): + yield self.store.user_add_threepid( + user_id, medium, address, validated_at, + self.hs.get_clock().time_msec() + ) + def _save_session(self, session): # TODO: Persistent storage logger.debug("Saving session %s", session) diff --git a/synapse/handlers/login.py b/synapse/handlers/login.py deleted file mode 100644 index 91d87d503..000000000 --- a/synapse/handlers/login.py +++ /dev/null @@ -1,83 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014, 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from twisted.internet import defer - -from ._base import BaseHandler -from synapse.api.errors import LoginError, Codes - -import bcrypt -import logging - -logger = logging.getLogger(__name__) - - -class LoginHandler(BaseHandler): - - def __init__(self, hs): - super(LoginHandler, self).__init__(hs) - self.hs = hs - - @defer.inlineCallbacks - def login(self, user, password): - """Login as the specified user with the specified password. - - Args: - user (str): The user ID. - password (str): The password. - Returns: - The newly allocated access token. - Raises: - StoreError if there was a problem storing the token. - LoginError if there was an authentication problem. - """ - # TODO do this better, it can't go in __init__ else it cyclic loops - if not hasattr(self, "reg_handler"): - self.reg_handler = self.hs.get_handlers().registration_handler - - # pull out the hash for this user if they exist - user_info = yield self.store.get_user_by_id(user_id=user) - if not user_info: - logger.warn("Attempted to login as %s but they do not exist", user) - raise LoginError(403, "", errcode=Codes.FORBIDDEN) - - stored_hash = user_info["password_hash"] - if bcrypt.checkpw(password, stored_hash): - # generate an access token and store it. - token = self.reg_handler._generate_token(user) - logger.info("Adding token %s for user %s", token, user) - yield self.store.add_access_token_to_user(user, token) - defer.returnValue(token) - else: - logger.warn("Failed password login for user %s", user) - raise LoginError(403, "", errcode=Codes.FORBIDDEN) - - @defer.inlineCallbacks - def set_password(self, user_id, newpassword, token_id=None): - password_hash = bcrypt.hashpw(newpassword, bcrypt.gensalt()) - - yield self.store.user_set_password_hash(user_id, password_hash) - yield self.store.user_delete_access_tokens_apart_from(user_id, token_id) - yield self.hs.get_pusherpool().remove_pushers_by_user_access_token( - user_id, token_id - ) - yield self.store.flush_user(user_id) - - @defer.inlineCallbacks - def add_threepid(self, user_id, medium, address, validated_at): - yield self.store.user_add_threepid( - user_id, medium, address, validated_at, - self.hs.get_clock().time_msec() - ) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index f81d75017..39392d9fd 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -91,7 +91,7 @@ class RegistrationHandler(BaseHandler): user = UserID(localpart, self.hs.hostname) user_id = user.to_string() - token = self._generate_token(user_id) + token = self.generate_token(user_id) yield self.store.register( user_id=user_id, token=token, @@ -111,7 +111,7 @@ class RegistrationHandler(BaseHandler): user_id = user.to_string() yield self.check_user_id_is_valid(user_id) - token = self._generate_token(user_id) + token = self.generate_token(user_id) yield self.store.register( user_id=user_id, token=token, @@ -161,7 +161,7 @@ class RegistrationHandler(BaseHandler): 400, "Invalid user localpart for this application service.", errcode=Codes.EXCLUSIVE ) - token = self._generate_token(user_id) + token = self.generate_token(user_id) yield self.store.register( user_id=user_id, token=token, @@ -208,7 +208,7 @@ class RegistrationHandler(BaseHandler): user_id = user.to_string() yield self.check_user_id_is_valid(user_id) - token = self._generate_token(user_id) + token = self.generate_token(user_id) try: yield self.store.register( user_id=user_id, @@ -273,7 +273,7 @@ class RegistrationHandler(BaseHandler): errcode=Codes.EXCLUSIVE ) - def _generate_token(self, user_id): + def generate_token(self, user_id): # urlsafe variant uses _ and - so use . as the separator and replace # all =s with .s so http clients don't quote =s when it is used as # query params. diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 0ab2f6597..e012c565e 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -94,17 +94,14 @@ class PusherPool: self.remove_pusher(p['app_id'], p['pushkey'], p['user_name']) @defer.inlineCallbacks - def remove_pushers_by_user_access_token(self, user_id, not_access_token_id): + def remove_pushers_by_user(self, user_id): all = yield self.store.get_all_pushers() logger.info( - "Removing all pushers for user %s except access token %s", - user_id, not_access_token_id + "Removing all pushers for user %s", + user_id, ) for p in all: - if ( - p['user_name'] == user_id and - p['access_token'] != not_access_token_id - ): + if p['user_name'] == user_id: logger.info( "Removing pusher for app id %s, pushkey %s, user %s", p['app_id'], p['pushkey'], p['user_name'] diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 998d4d44c..694072693 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -78,9 +78,8 @@ class LoginRestServlet(ClientV1RestServlet): login_submission["user"] = UserID.create( login_submission["user"], self.hs.hostname).to_string() - handler = self.handlers.login_handler - token = yield handler.login( - user=login_submission["user"], + token = yield self.handlers.auth_handler.login_with_password( + user_id=login_submission["user"], password=login_submission["password"]) result = { diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index b082140f1..897c54b53 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -36,7 +36,6 @@ class PasswordRestServlet(RestServlet): self.hs = hs self.auth = hs.get_auth() self.auth_handler = hs.get_handlers().auth_handler - self.login_handler = hs.get_handlers().login_handler @defer.inlineCallbacks def on_POST(self, request): @@ -47,7 +46,7 @@ class PasswordRestServlet(RestServlet): authed, result, params = yield self.auth_handler.check_auth([ [LoginType.PASSWORD], [LoginType.EMAIL_IDENTITY] - ], body) + ], body, self.hs.get_ip_from_request(request)) if not authed: defer.returnValue((401, result)) @@ -79,7 +78,7 @@ class PasswordRestServlet(RestServlet): raise SynapseError(400, "", Codes.MISSING_PARAM) new_password = params['new_password'] - yield self.login_handler.set_password( + yield self.auth_handler.set_password( user_id, new_password, None ) @@ -95,7 +94,6 @@ class ThreepidRestServlet(RestServlet): def __init__(self, hs): super(ThreepidRestServlet, self).__init__() self.hs = hs - self.login_handler = hs.get_handlers().login_handler self.identity_handler = hs.get_handlers().identity_handler self.auth = hs.get_auth() @@ -135,7 +133,7 @@ class ThreepidRestServlet(RestServlet): logger.warn("Couldn't add 3pid: invalid response from ID sevrer") raise SynapseError(500, "Invalid response from ID Server") - yield self.login_handler.add_threepid( + yield self.auth_handler.add_threepid( auth_user.to_string(), threepid['medium'], threepid['address'], diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b5926f9ca..012c447e8 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -50,7 +50,6 @@ class RegisterRestServlet(RestServlet): self.auth_handler = hs.get_handlers().auth_handler self.registration_handler = hs.get_handlers().registration_handler self.identity_handler = hs.get_handlers().identity_handler - self.login_handler = hs.get_handlers().login_handler @defer.inlineCallbacks def on_POST(self, request): @@ -143,7 +142,7 @@ class RegisterRestServlet(RestServlet): if reqd not in threepid: logger.info("Can't add incomplete 3pid") else: - yield self.login_handler.add_threepid( + yield self.auth_handler.add_threepid( user_id, threepid['medium'], threepid['address'], diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 4eaa088b3..d2d5b07cb 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -111,16 +111,16 @@ class RegistrationStore(SQLBaseStore): }) @defer.inlineCallbacks - def user_delete_access_tokens_apart_from(self, user_id, token_id): + def user_delete_access_tokens(self, user_id): yield self.runInteraction( - "user_delete_access_tokens_apart_from", - self._user_delete_access_tokens_apart_from, user_id, token_id + "user_delete_access_tokens", + self._user_delete_access_tokens, user_id ) - def _user_delete_access_tokens_apart_from(self, txn, user_id, token_id): + def _user_delete_access_tokens(self, txn, user_id): txn.execute( - "DELETE FROM access_tokens WHERE user_id = ? AND id != ?", - (user_id, token_id) + "DELETE FROM access_tokens WHERE user_id = ?", + (user_id, ) ) @defer.inlineCallbacks From f7e2f981ea1feb8461a5bddd9378bd5084833fc0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 16:01:10 +0100 Subject: [PATCH 157/266] Use list comprehension instead of filter --- synapse/handlers/message.py | 13 +++---------- synapse/handlers/sync.py | 13 +++---------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index b941312ef..2c4af8dc9 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -177,17 +177,10 @@ class MessageHandler(BaseHandler): return True - event_and_state = filter( - allowed, - [ - (e, event_id_to_state[e.event_id]) - for e in events - ] - ) - defer.returnValue([ - ev - for ev, _ in event_and_state + event + for event in events + if allowed(event, event_id_to_state[event.event_id]) ]) @defer.inlineCallbacks diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index d960078e7..ec8d78ba8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -334,17 +334,10 @@ class SyncHandler(BaseHandler): return True - event_and_state = filter( - allowed, - [ - (e, event_id_to_state[e.event_id]) - for e in events - ] - ) - defer.returnValue([ - ev - for ev, _ in event_and_state + event + for event in events + if allowed(event, event_id_to_state[event.event_id]) ]) @defer.inlineCallbacks From a7eeb34c64d828539dd6799f2347371a8eabae73 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 16:02:05 +0100 Subject: [PATCH 158/266] Simplify staggered deferred lists --- synapse/handlers/message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 2c4af8dc9..8a9e6cf6c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -406,9 +406,9 @@ class MessageHandler(BaseHandler): # Only do N rooms at once n = 5 d_list = [handle_room(e) for e in room_list] - for ds in [d_list[i:i + n] for i in range(0, len(d_list), n)]: + for i in range(0, len(d_list), n): yield defer.gatherResults( - ds, + d_list[i:i + n], consumeErrors=True ).addErrback(unwrapFirstError) From 5ce903e2f7a4136b67ec16564e24244b00118367 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Wed, 12 Aug 2015 16:09:19 +0100 Subject: [PATCH 159/266] Merge password checking implementations --- synapse/handlers/auth.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 1504b00d7..98d99dd0a 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -157,22 +157,13 @@ class AuthHandler(BaseHandler): if "user" not in authdict or "password" not in authdict: raise LoginError(400, "", Codes.MISSING_PARAM) - user = authdict["user"] + user_id = authdict["user"] password = authdict["password"] - if not user.startswith('@'): - user = UserID.create(user, self.hs.hostname).to_string() + if not user_id.startswith('@'): + user_id = UserID.create(user_id, self.hs.hostname).to_string() - user_info = yield self.store.get_user_by_id(user_id=user) - if not user_info: - logger.warn("Attempted to login as %s but they do not exist", user) - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - - stored_hash = user_info["password_hash"] - if bcrypt.checkpw(password, stored_hash): - defer.returnValue(user) - else: - logger.warn("Failed password login for user %s", user) - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + self._check_password(user_id, password) + defer.returnValue(user_id) @defer.inlineCallbacks def _check_recaptcha(self, authdict, clientip): @@ -292,6 +283,16 @@ class AuthHandler(BaseHandler): StoreError if there was a problem storing the token. LoginError if there was an authentication problem. """ + self._check_password(user_id, password) + + reg_handler = self.hs.get_handlers().registration_handler + access_token = reg_handler.generate_token(user_id) + logger.info("Adding token %s for user %s", access_token, user_id) + yield self.store.add_access_token_to_user(user_id, access_token) + defer.returnValue(access_token) + + def _check_password(self, user_id, password): + """Checks that user_id has passed password, raises LoginError if not.""" user_info = yield self.store.get_user_by_id(user_id=user_id) if not user_info: logger.warn("Attempted to login as %s but they do not exist", user_id) @@ -302,12 +303,6 @@ class AuthHandler(BaseHandler): logger.warn("Failed password login for user %s", user_id) raise LoginError(403, "", errcode=Codes.FORBIDDEN) - reg_handler = self.hs.get_handlers().registration_handler - access_token = reg_handler.generate_token(user_id) - logger.info("Adding token %s for user %s", access_token, user_id) - yield self.store.add_access_token_to_user(user_id, access_token) - defer.returnValue(access_token) - @defer.inlineCallbacks def set_password(self, user_id, newpassword): password_hash = bcrypt.hashpw(newpassword, bcrypt.gensalt()) From cfa62007a392a2d6da818a45999c71da44a6da12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 16:42:46 +0100 Subject: [PATCH 160/266] Docstring --- synapse/util/caches/descriptors.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 408c3b5e6..83bfec2f0 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -341,6 +341,33 @@ def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False): def cachedList(cache, list_name, num_args=1, inlineCallbacks=False): + """Creates a descriptor that wraps a function in a `CacheListDescriptor`. + + Used to do batch lookups for an already created cache. A single argument + is specified as a list that is iterated through to lookup keys in the + original cache. A new list consisting of the keys that weren't in the cache + get passed to the original function, the result of which is stored in the + cache. + + Args: + cache (Cache): The underlying cache to use. + list_name (str): The name of the argument that is the list to use to + do batch lookups in the cache. + num_args (int): Number of arguments to use as the key in the cache. + inlineCallbacks (bool): Should the function be wrapped in an + `defer.inlineCallbacks`? + + Example: + + class Example(object): + @cached(num_args=2) + def do_something(self, first_arg): + ... + + @cachedList(do_something.cache, list_name="second_args", num_args=2) + def batch_do_something(self, first_arg, second_args): + ... + """ return lambda orig: CacheListDescriptor( orig, cache=cache, From 2eb91e6694f43854c28fe33326cb2e8be4ac69c5 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 16:53:30 +0100 Subject: [PATCH 161/266] enable registration in the demo servers --- demo/start.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/start.sh b/demo/start.sh index b5dea5e17..572dbfab0 100755 --- a/demo/start.sh +++ b/demo/start.sh @@ -23,7 +23,6 @@ for port in 8080 8081 8082; do #rm $DIR/etc/$port.config python -m synapse.app.homeserver \ --generate-config \ - --enable_registration \ -H "localhost:$https_port" \ --config-path "$DIR/etc/$port.config" \ @@ -36,6 +35,8 @@ for port in 8080 8081 8082; do fi fi + perl -p -i -e 's/^enable_registration:.*/enable_registration: true/g' $DIR/etc/$port.config + python -m synapse.app.homeserver \ --config-path "$DIR/etc/$port.config" \ -D \ From 7b0e7970800df8cedf7966e6fb3837ee233d9ea4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 17:05:24 +0100 Subject: [PATCH 162/266] Fix _filter_events_for_client --- synapse/handlers/message.py | 4 +--- synapse/handlers/sync.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8a9e6cf6c..29e81085d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -145,9 +145,7 @@ class MessageHandler(BaseHandler): ) ) - def allowed(event_and_state): - event, state = event_and_state - + def allowed(event, state): if event.type == EventTypes.RoomHistoryVisibility: return True diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ec8d78ba8..7206ae23d 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -302,9 +302,7 @@ class SyncHandler(BaseHandler): ) ) - def allowed(event_and_state): - event, state = event_and_state - + def allowed(event, state): if event.type == EventTypes.RoomHistoryVisibility: return True From df361d08f7b778036cd0def86289d511267af849 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 17:06:21 +0100 Subject: [PATCH 163/266] Split _get_state_for_group_from_cache into two --- synapse/storage/state.py | 85 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 79c3b82d9..129384236 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -287,42 +287,39 @@ class StateStore(SQLBaseStore): f, ) - def _get_state_for_group_from_cache(self, group, types=None): + def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). `missing_types` is the list of types that aren't in the cache for that - group, or None if `types` is None. `got_all` is a bool indicating if - we successfully retrieved all requests state from the cache, if False - we need to query the DB for the missing state. + group. `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. + + Args: + group: The state group to lookup + types (list): List of 2-tuples of the form (`type`, `state_key`), + where a `state_key` of `None` matches all state_keys for the + `type`. """ is_all, state_dict = self._state_group_cache.get(group) type_to_key = {} missing_types = set() - if types is not None: - for typ, state_key in types: - if state_key is None: - type_to_key[typ] = None + for typ, state_key in types: + if state_key is None: + type_to_key[typ] = None + missing_types.add((typ, state_key)) + else: + if type_to_key.get(typ, object()) is not None: + type_to_key.setdefault(typ, set()).add(state_key) + + if (typ, state_key) not in state_dict: missing_types.add((typ, state_key)) - else: - if type_to_key.get(typ, object()) is not None: - type_to_key.setdefault(typ, set()).add(state_key) - - if (typ, state_key) not in state_dict: - missing_types.add((typ, state_key)) - - if is_all: - missing_types = set() - if types is None: - return state_dict, set(), True sentinel = object() def include(typ, state_key): - if types is None: - return True - valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: return False @@ -340,6 +337,19 @@ class StateStore(SQLBaseStore): if include(k[0], k[1]) }, missing_types, got_all + def _get_all_state_from_cache(self, group): + """Checks if group is in cache. See `_get_state_for_groups` + + Returns 2-tuple (`state_dict`, `got_all`). `got_all` is a bool + indicating if we successfully retrieved all requests state from the + cache, if False we need to query the DB for the missing state. + + Args: + group: The state group to lookup + """ + is_all, state_dict = self._state_group_cache.get(group) + return state_dict, is_all + @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): """Given list of groups returns dict of group -> list of state events @@ -349,18 +359,29 @@ class StateStore(SQLBaseStore): """ results = {} missing_groups_and_types = [] - for group in set(groups): - state_dict, missing_types, got_all = self._get_state_for_group_from_cache( - group, types - ) + if types is not None: + for group in set(groups): + state_dict, missing_types, got_all = self._get_some_state_from_cache( + group, types + ) - results[group] = state_dict + results[group] = state_dict - if not got_all: - missing_groups_and_types.append(( - group, - missing_types if types else None - )) + if not got_all: + missing_groups_and_types.append(( + group, + missing_types + )) + else: + for group in set(groups): + state_dict, got_all = self._get_all_state_from_cache( + group + ) + + results[group] = state_dict + + if not got_all: + missing_groups_and_types.append((group, None)) if not missing_groups_and_types: defer.returnValue({ From 101ee3fd0022162da2ea2dc3f1b1846a08e21f9e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 17:08:05 +0100 Subject: [PATCH 164/266] Better variable name --- synapse/storage/state.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 129384236..a3fc859b0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -385,12 +385,12 @@ class StateStore(SQLBaseStore): if not missing_groups_and_types: defer.returnValue({ - k: { - key: ev - for key, ev in state.items() - if ev + group: { + type_tuple: event + for type_tuple, event in state.items() + if event } - for k, state in results.items() + for group, state in results.items() }) # Okay, so we have some missing_types, lets fetch them. From c10ac7806e2cde04f344b641d01efa46d34f9985 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 17:16:30 +0100 Subject: [PATCH 165/266] Explain why we're prefilling dict with Nones --- synapse/storage/state.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a3fc859b0..57e334cc3 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -412,6 +412,10 @@ class StateStore(SQLBaseStore): for group, state_ids in group_state_dict.items(): if types: + # We delibrately put key -> None mappings into the cache to + # cache absence of the key, on the assumption that if we've + # explicitly asked for some types then we will probably ask + # for them again. state_dict = { key: None for key in types From 0fbed2a8fac2c04ac4f46645aa9757fdca8b7cc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Aug 2015 17:22:54 +0100 Subject: [PATCH 166/266] Comment --- synapse/storage/state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 57e334cc3..abc5b6643 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -410,6 +410,8 @@ class StateStore(SQLBaseStore): for e in state_events } + # Now we want to update the cache with all the things we fetched + # from the database. for group, state_ids in group_state_dict.items(): if types: # We delibrately put key -> None mappings into the cache to @@ -433,7 +435,6 @@ class StateStore(SQLBaseStore): for e in evs }) - # Update the cache self._state_group_cache.update( cache_seq_num, key=group, From 21ac8be5f7be79cfffb32ad9fe1bba9515d9fd3e Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 12 Aug 2015 17:25:13 +0100 Subject: [PATCH 167/266] Depend on Twisted>=15.1 rather than pining to a particular version --- setup.py | 2 +- synapse/python_dependencies.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c790484c7..16ccc0f1b 100755 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ setup( description="Reference Synapse Home Server", install_requires=dependencies['requirements'](include_conditional=True).keys(), setup_requires=[ - "Twisted==15.2.1", # Here to override setuptools_trial's dependency on Twisted>=2.4.0 + "Twisted>=15.1.0", # Here to override setuptools_trial's dependency on Twisted>=2.4.0 "setuptools_trial", "mock" ], diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 534c4a669..fa06480ad 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) REQUIREMENTS = { "syutil>=0.0.7": ["syutil>=0.0.7"], - "Twisted==15.2.1": ["twisted==15.2.1"], + "Twisted>=15.1.0": ["twisted>=15.1.0"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], "pyyaml": ["yaml"], From ba5d34a83274127a2c0226059778d226355bdb6c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 11:38:59 +0100 Subject: [PATCH 168/266] Add some metrics about the reactor --- synapse/metrics/__init__.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 9233ea3da..e014b415f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -18,8 +18,12 @@ from __future__ import absolute_import import logging from resource import getrusage, getpagesize, RUSAGE_SELF +import functools import os import stat +import time + +from twisted.internet import reactor from .metric import ( CounterMetric, CallbackMetric, DistributionMetric, CacheMetric @@ -144,3 +148,28 @@ def _process_fds(): return counts get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) + +reactor_metrics = get_metrics_for("reactor") +tick_time = reactor_metrics.register_distribution("tick_time") +pending_calls_metric = reactor_metrics.register_distribution("pending_calls") + + +def runUntilCurrentTimer(func): + + @functools.wraps(func) + def f(*args, **kwargs): + start = time.time() * 1000 + pending_calls = len(reactor.getDelayedCalls()) + ret = func(*args, **kwargs) + end = time.time() * 1000 + tick_time.inc_by(end - start) + pending_calls_metric.inc_by(pending_calls) + return ret + + return f + + +if hasattr(reactor, "runUntilCurrent"): + # runUntilCurrent is called when we have pending calls. It is called once + # per iteratation after fd polling. + reactor.runUntilCurrent = runUntilCurrentTimer(reactor.runUntilCurrent) From a6c27de1aa283a9d7a347db53b08367c53a15a24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 11:41:57 +0100 Subject: [PATCH 169/266] Don't time getDelayedCalls --- synapse/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e014b415f..0be977299 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -158,8 +158,8 @@ def runUntilCurrentTimer(func): @functools.wraps(func) def f(*args, **kwargs): - start = time.time() * 1000 pending_calls = len(reactor.getDelayedCalls()) + start = time.time() * 1000 ret = func(*args, **kwargs) end = time.time() * 1000 tick_time.inc_by(end - start) From adbd720fab5f6ee42c4e14b06eb6f385bae14dc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 11:47:38 +0100 Subject: [PATCH 170/266] PEP8 --- synapse/http/matrixfederationclient.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 4d74bd5d7..854e17a47 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -142,7 +142,6 @@ class MatrixFederationHttpClient(object): producer ) - return self.clock.time_bound_deferred( request_deferred, time_out=timeout/1000. if timeout else 60, From 7eb4d626bada90834f8cfe464e4424ed8991e590 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 13 Aug 2015 13:12:33 +0100 Subject: [PATCH 171/266] Add max-line-length to the flake8 section of setup.cfg --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 888ad6ed4..abb649958 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,3 +16,6 @@ ignore = docs/* pylint.cfg tox.ini + +[flake8] +max-line-length = 90 From 7e77a82c5f4c2dbda1d00dace74a1aece2ab5b78 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 16:58:10 +0100 Subject: [PATCH 172/266] Re-enable receipts --- synapse/handlers/receipts.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 415dd339f..86c911c4b 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -171,7 +171,6 @@ class ReceiptEventSource(object): @defer.inlineCallbacks def get_new_events_for_user(self, user, from_key, limit): - defer.returnValue(([], from_key)) from_key = int(from_key) to_key = yield self.get_current_key() @@ -194,7 +193,6 @@ class ReceiptEventSource(object): @defer.inlineCallbacks def get_pagination_rows(self, user, config, key): to_key = int(config.from_key) - defer.returnValue(([], to_key)) if config.to_key: from_key = int(config.to_key) From 5db56779699a3c9a95701e6373ac8bd5cace7860 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 16:58:23 +0100 Subject: [PATCH 173/266] Add metrics to the receipts cache --- synapse/metrics/metric.py | 7 +++++++ synapse/storage/receipts.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 21b37748f..5b1e9c3dd 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -151,5 +151,12 @@ class CacheMetric(object): def inc_misses(self, *values): self.total.inc(*values) + def inc_hits_by(self, inc, *values): + self.hits.inc_by(inc, *values) + self.total.inc_by(inc, *values) + + def inc_misses_by(self, inc, *values): + self.total.inc_by(inc, *values) + def render(self): return self.hits.render() + self.total.render() + self.size.render() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index cac1a5657..42f4b5f78 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -15,6 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.util.caches import cache_counter, caches_by_name from twisted.internet import defer @@ -305,6 +306,8 @@ class _RoomStreamChangeCache(object): self._room_to_key = {} self._cache = sorteddict() self._earliest_key = None + self.name = "ReceiptsRoomChangeCache" + caches_by_name[self.name] = self._cache @defer.inlineCallbacks def get_rooms_changed(self, store, room_ids, key): @@ -321,6 +324,9 @@ class _RoomStreamChangeCache(object): else: result = room_ids + cache_counter.inc_hits_by(len(result), self.name) + cache_counter.inc_misses_by(len(room_ids) - len(result), self.name) + defer.returnValue(result) @defer.inlineCallbacks From 57877b01d7dad613f4e1dc3fb99983320ea50d40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 17:00:17 +0100 Subject: [PATCH 174/266] Replace list comprehension --- synapse/storage/state.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index abc5b6643..1f5e62fa8 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -426,14 +426,9 @@ class StateStore(SQLBaseStore): else: state_dict = results[group] - evs = [ - state_events[e_id] for e_id in state_ids - if e_id in state_events # This can happen if event is rejected. - ] - state_dict.update({ - (e.type, e.state_key): e - for e in evs - }) + for event_id in state_ids: + state_event = state_events[event_id] + state_dict[(state_event.type, state_event.state_key)] = state_event self._state_group_cache.update( cache_seq_num, From 2bb2c025711f8ff8ea044a203059ca3ea6b94749 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 17:11:30 +0100 Subject: [PATCH 175/266] Remove some vertical space --- synapse/storage/state.py | 45 ++++++++-------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1f5e62fa8..185f88fd7 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -60,7 +60,6 @@ class StateStore(SQLBaseStore): ) groups = set(event_to_groups.values()) - group_to_state = yield self._get_state_for_groups(groups) defer.returnValue({ @@ -201,10 +200,7 @@ class StateStore(SQLBaseStore): txn.execute(sql, args) - results[group] = [ - r[0] - for r in txn.fetchall() - ] + results[group] = [r[0] for r in txn.fetchall()] return results @@ -235,20 +231,14 @@ class StateStore(SQLBaseStore): ) groups = set(event_to_groups.values()) - - group_to_state = yield self._get_state_for_groups( - groups, types - ) + group_to_state = yield self._get_state_for_groups(groups, types) event_to_state = { event_id: group_to_state[group] for event_id, group in event_to_groups.items() } - defer.returnValue({ - event: event_to_state[event] - for event in event_ids - }) + defer.returnValue({event: event_to_state[event] for event in event_ids}) @cached(num_args=2, lru=True, max_entries=100000) def _get_state_group_for_event(self, room_id, event_id): @@ -282,10 +272,7 @@ class StateStore(SQLBaseStore): return results - return self.runInteraction( - "_get_state_group_for_events", - f, - ) + return self.runInteraction("_get_state_group_for_events", f) def _get_some_state_from_cache(self, group, types): """Checks if group is in cache. See `_get_state_for_groups` @@ -332,8 +319,7 @@ class StateStore(SQLBaseStore): got_all = not (missing_types or types is None) return { - k: v - for k, v in state_dict.items() + k: v for k, v in state_dict.items() if include(k[0], k[1]) }, missing_types, got_all @@ -364,20 +350,15 @@ class StateStore(SQLBaseStore): state_dict, missing_types, got_all = self._get_some_state_from_cache( group, types ) - results[group] = state_dict if not got_all: - missing_groups_and_types.append(( - group, - missing_types - )) + missing_groups_and_types.append((group, missing_types)) else: for group in set(groups): state_dict, got_all = self._get_all_state_from_cache( group ) - results[group] = state_dict if not got_all: @@ -405,10 +386,7 @@ class StateStore(SQLBaseStore): get_prev_content=False ) - state_events = { - e.event_id: e - for e in state_events - } + state_events = {e.event_id: e for e in state_events} # Now we want to update the cache with all the things we fetched # from the database. @@ -418,10 +396,7 @@ class StateStore(SQLBaseStore): # cache absence of the key, on the assumption that if we've # explicitly asked for some types then we will probably ask # for them again. - state_dict = { - key: None - for key in types - } + state_dict = {key: None for key in types} state_dict.update(results[group]) else: state_dict = results[group] @@ -438,9 +413,7 @@ class StateStore(SQLBaseStore): ) results[group].update({ - key: value - for key, value in state_dict.items() - if value + key: value for key, value in state_dict.items() if value }) defer.returnValue(results) From 9f7f228ec2e2948c69ca3910d27fffdd2c2fea50 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Aug 2015 17:20:59 +0100 Subject: [PATCH 176/266] Remove pointless map --- synapse/handlers/federation.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 2bfd0a40e..1e3dccf5a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -237,9 +237,7 @@ class FederationHandler(BaseHandler): ) ) - def redact_disallowed(event_and_state): - event, state = event_and_state - + def redact_disallowed(event, state): if not state: return event @@ -273,13 +271,11 @@ class FederationHandler(BaseHandler): return event - res = map(redact_disallowed, [ - (e, event_to_state[e.event_id]) + defer.returnValue([ + redact_disallowed(e, event_to_state[e.event_id]) for e in events ]) - defer.returnValue(res) - @log_function @defer.inlineCallbacks def backfill(self, dest, room_id, limit, extremities=[]): From 0cceb2ac92cde0a4289adfc6e9000c7b1c54bdae Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 13 Aug 2015 17:27:46 +0100 Subject: [PATCH 177/266] Add a few strategic new lines to break up the on_query_client_keys and on_claim_client_keys methods in federation_server.py --- synapse/federation/federation_server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index c32908ac2..725c6f3fa 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -323,13 +323,16 @@ class FederationServer(FederationBase): else: for device_id in device_ids: query.append((user_id, device_id)) + results = yield self.store.get_e2e_device_keys(query) + json_result = {} for user_id, device_keys in results.items(): for device_id, json_bytes in device_keys.items(): json_result.setdefault(user_id, {})[device_id] = json.loads( json_bytes ) + defer.returnValue({"device_keys": json_result}) @defer.inlineCallbacks @@ -339,7 +342,9 @@ class FederationServer(FederationBase): for user_id, device_keys in content.get("one_time_keys", {}).items(): for device_id, algorithm in device_keys.items(): query.append((user_id, device_id, algorithm)) + results = yield self.store.claim_e2e_one_time_keys(query) + json_result = {} for user_id, device_keys in results.items(): for device_id, keys in device_keys.items(): @@ -347,6 +352,7 @@ class FederationServer(FederationBase): json_result.setdefault(user_id, {})[device_id] = { key_id: json.loads(json_bytes) } + defer.returnValue({"one_time_keys": json_result}) @defer.inlineCallbacks From 95b0f5449d2b7539a7817b231090d607e55d6ac7 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 13 Aug 2015 17:34:22 +0100 Subject: [PATCH 178/266] Fix flake8 warning --- synapse/config/repository.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2cb0812d7..64644b9a7 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,7 @@ ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) + def parse_thumbnail_requirements(thumbnail_sizes): """ Takes a list of dictionaries with "width", "height", and "method" keys and creates a map from image media types to the thumbnail size, thumnailing From 68b255c5a19cca4fce6a4225447e143b5ed7814b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Aug 2015 15:06:22 +0100 Subject: [PATCH 179/266] Batch _get_linearized_receipts_for_rooms --- synapse/storage/receipts.py | 79 ++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 42f4b5f78..fb35472ad 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -14,7 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cachedInlineCallbacks +from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList from synapse.util.caches import cache_counter, caches_by_name from twisted.internet import defer @@ -55,19 +55,13 @@ class ReceiptsStore(SQLBaseStore): self, room_ids, from_key ) - results = yield defer.gatherResults( - [ - self.get_linearized_receipts_for_room( - room_id, to_key, from_key=from_key - ) - for room_id in room_ids - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + results = yield self._get_linearized_receipts_for_rooms( + room_ids, to_key, from_key=from_key + ) - defer.returnValue([ev for res in results for ev in res]) + defer.returnValue([ev for res in results.values() for ev in res]) - @defer.inlineCallbacks + @cachedInlineCallbacks(num_args=3, max_entries=5000) def get_linearized_receipts_for_room(self, room_id, to_key, from_key=None): """Get receipts for a single room for sending to clients. @@ -127,6 +121,61 @@ class ReceiptsStore(SQLBaseStore): "content": content, }]) + @cachedList(cache=get_linearized_receipts_for_room.cache, list_name="room_ids", + num_args=3, inlineCallbacks=True) + def _get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): + if not room_ids: + defer.returnValue({}) + + def f(txn): + if from_key: + sql = ( + "SELECT * FROM receipts_linearized WHERE" + " room_id IN (%s) AND stream_id > ? AND stream_id <= ?" + ) % ( + ",".join(["?"] * len(room_ids)) + ) + args = list(room_ids) + args.extend([from_key, to_key]) + + txn.execute(sql, args) + else: + sql = ( + "SELECT * FROM receipts_linearized WHERE" + " room_id IN (%s) AND stream_id <= ?" + ) % ( + ",".join(["?"] * len(room_ids)) + ) + + args = list(room_ids) + args.append(to_key) + + txn.execute(sql, args) + + return self.cursor_to_dict(txn) + + txn_results = yield self.runInteraction( + "_get_linearized_receipts_for_rooms", f + ) + + results = {} + for row in txn_results: + results.setdefault(row["room_id"], { + "type": "m.receipt", + "room_id": row["room_id"], + "content": {}, + })["content"].setdefault( + row["event_id"], {} + ).setdefault( + row["receipt_type"], {} + )[row["user_id"]] = json.loads(row["data"]) + + results = { + room_id: [results[room_id]] if room_id in results else [] + for room_id in room_ids + } + defer.returnValue(results) + def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_max_token(self) @@ -321,11 +370,11 @@ class _RoomStreamChangeCache(object): result = set( self._cache[k] for k in keys[i:] ).intersection(room_ids) + + cache_counter.inc_hits(self.name) else: result = room_ids - - cache_counter.inc_hits_by(len(result), self.name) - cache_counter.inc_misses_by(len(room_ids) - len(result), self.name) + cache_counter.inc_misses(self.name) defer.returnValue(result) From 891dfd90bd97ad485d54dfae3e510c640de8e978 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Aug 2015 15:42:52 +0100 Subject: [PATCH 180/266] Fix pending_calls metric to not lie --- synapse/metrics/__init__.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 0be977299..7b8a20108 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -158,18 +158,33 @@ def runUntilCurrentTimer(func): @functools.wraps(func) def f(*args, **kwargs): - pending_calls = len(reactor.getDelayedCalls()) + now = reactor.seconds() + num_pending = 0 + + # _newTimedCalls is one long list of *all* pending calls. Below loop + # is based off of impl of reactor.runUntilCurrent + for p in reactor._newTimedCalls: + if p.time > now: + break + + if p.delayed_time > 0: + continue + + num_pending += 1 + + num_pending += len(reactor.threadCallQueue) + start = time.time() * 1000 ret = func(*args, **kwargs) end = time.time() * 1000 tick_time.inc_by(end - start) - pending_calls_metric.inc_by(pending_calls) + pending_calls_metric.inc_by(num_pending) return ret return f -if hasattr(reactor, "runUntilCurrent"): +if hasattr(reactor, "runUntilCurrent") and hasattr(reactor, "_newTimedCalls"): # runUntilCurrent is called when we have pending calls. It is called once # per iteratation after fd polling. reactor.runUntilCurrent = runUntilCurrentTimer(reactor.runUntilCurrent) From f9d4da7f4502aeefe7a5a6a9ec0d2682458e7834 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 09:39:12 +0100 Subject: [PATCH 181/266] Fix bug where we were leaking None into state event lists --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 185f88fd7..ecb62e6df 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -412,9 +412,10 @@ class StateStore(SQLBaseStore): full=(types is None), ) - results[group].update({ + # We replace here to remove all the entries with None values. + results[group] = { key: value for key, value in state_dict.items() if value - }) + } defer.returnValue(results) From 47abebfd6d0ea56d7ac7a565f359992fde323177 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 09:50:50 +0100 Subject: [PATCH 182/266] Add batched version of store.get_presence_state --- synapse/storage/presence.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 4f91a2b87..f351b76a7 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -14,7 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedList from twisted.internet import defer @@ -36,6 +36,7 @@ class PresenceStore(SQLBaseStore): desc="has_presence_state", ) + @cached() def get_presence_state(self, user_localpart): return self._simple_select_one( table="presence", @@ -44,6 +45,23 @@ class PresenceStore(SQLBaseStore): desc="get_presence_state", ) + @cachedList(get_presence_state.cache, list_name="user_localparts") + def get_presence_states(self, user_localparts): + def f(txn): + results = {} + for user_localpart in user_localparts: + results[user_localpart] = self._simple_select_one_txn( + txn, + table="presence", + keyvalues={"user_id": user_localpart}, + retcols=["state", "status_msg", "mtime"], + desc="get_presence_state", + ) + + return results + + return self.runInteraction("get_presence_states", f) + def set_presence_state(self, user_localpart, new_state): return self._simple_update_one( table="presence", From 1a9510bb84d79f6ff78d32390195bc97ed9a439e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 10:40:23 +0100 Subject: [PATCH 183/266] Implement a batched presence_handler.get_state and use it --- synapse/handlers/message.py | 18 ++++------- synapse/handlers/presence.py | 63 ++++++++++++++++++++++++++++++++++++ synapse/storage/presence.py | 6 ++-- 3 files changed, 73 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 29e81085d..f12465fa2 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -460,20 +460,14 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def get_presence(): - presence_defs = yield defer.DeferredList( - [ - presence_handler.get_state( - target_user=UserID.from_string(m.user_id), - auth_user=auth_user, - as_event=True, - check_auth=False, - ) - for m in room_members - ], - consumeErrors=True, + states = yield presence_handler.get_states( + target_users=[UserID.from_string(m.user_id) for m in room_members], + auth_user=auth_user, + as_event=True, + check_auth=False, ) - defer.returnValue([p for success, p in presence_defs if success]) + defer.returnValue(states.values()) receipts_handler = self.hs.get_handlers().receipts_handler diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 341a516da..33d76efe0 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -232,6 +232,69 @@ class PresenceHandler(BaseHandler): else: defer.returnValue(state) + @defer.inlineCallbacks + def get_states(self, target_users, auth_user, as_event=False, check_auth=True): + try: + local_users, remote_users = partitionbool( + target_users, + lambda u: self.hs.is_mine(u) + ) + + if check_auth: + for u in local_users: + visible = yield self.is_presence_visible( + observer_user=auth_user, + observed_user=u + ) + + if not visible: + raise SynapseError(404, "Presence information not visible") + + results = {} + if local_users: + for u in local_users: + if u in self._user_cachemap: + results[u] = self._user_cachemap[u].get_state() + + local_to_user = {u.localpart: u for u in local_users} + + states = yield self.store.get_presence_states( + [u.localpart for u in local_users if u not in results] + ) + + for local_part, state in states.items(): + res = {"presence": state["state"]} + if "status_msg" in state and state["status_msg"]: + res["status_msg"] = state["status_msg"] + results[local_to_user[local_part]] = res + + for u in remote_users: + # TODO(paul): Have remote server send us permissions set + results[u] = self._get_or_offline_usercache(u).get_state() + + for state in results.values(): + if "last_active" in state: + state["last_active_ago"] = int( + self.clock.time_msec() - state.pop("last_active") + ) + + if as_event: + for user, state in results.items(): + content = state + content["user_id"] = user.to_string() + + if "last_active" in content: + content["last_active_ago"] = int( + self._clock.time_msec() - content.pop("last_active") + ) + + results[user] = {"type": "m.presence", "content": content} + except: + logger.exception(":(") + raise + + defer.returnValue(results) + @defer.inlineCallbacks @log_function def set_state(self, target_user, auth_user, state): diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index f351b76a7..15d98198e 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -50,13 +50,15 @@ class PresenceStore(SQLBaseStore): def f(txn): results = {} for user_localpart in user_localparts: - results[user_localpart] = self._simple_select_one_txn( + res = self._simple_select_one_txn( txn, table="presence", keyvalues={"user_id": user_localpart}, retcols=["state", "status_msg", "mtime"], - desc="get_presence_state", + allow_none=True, ) + if res: + results[user_localpart] = res return results From 2d97e65558f37fa0fbdd8d06545c32b410f1b5ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 10:46:55 +0100 Subject: [PATCH 184/266] Remember to invalidate caches --- synapse/storage/presence.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 15d98198e..9b136f311 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -21,12 +21,15 @@ from twisted.internet import defer class PresenceStore(SQLBaseStore): def create_presence(self, user_localpart): - return self._simple_insert( + res = self._simple_insert( table="presence", values={"user_id": user_localpart}, desc="create_presence", ) + self.get_presence_state.invalidate((user_localpart,)) + return res + def has_presence_state(self, user_localpart): return self._simple_select_one( table="presence", @@ -65,7 +68,7 @@ class PresenceStore(SQLBaseStore): return self.runInteraction("get_presence_states", f) def set_presence_state(self, user_localpart, new_state): - return self._simple_update_one( + res = self._simple_update_one( table="presence", keyvalues={"user_id": user_localpart}, updatevalues={"state": new_state["state"], @@ -74,6 +77,9 @@ class PresenceStore(SQLBaseStore): desc="set_presence_state", ) + self.get_presence_state.invalidate((user_localpart,)) + return res + def allow_presence_visible(self, observed_localpart, observer_userid): return self._simple_insert( table="presence_allow_inbound", From 4d8e1e1f9e6fb143aa1317f31e561ef7629bfbf2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 13:36:07 +0100 Subject: [PATCH 185/266] Remove added unused methods --- synapse/metrics/metric.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 5b1e9c3dd..21b37748f 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -151,12 +151,5 @@ class CacheMetric(object): def inc_misses(self, *values): self.total.inc(*values) - def inc_hits_by(self, inc, *values): - self.hits.inc_by(inc, *values) - self.total.inc_by(inc, *values) - - def inc_misses_by(self, inc, *values): - self.total.inc_by(inc, *values) - def render(self): return self.hits.render() + self.total.render() + self.size.render() From d3d582bc1c1d6d497f72d6b248e4f15722f6e18d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 13:38:09 +0100 Subject: [PATCH 186/266] Remove unused import --- synapse/storage/receipts.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index fb35472ad..46c06e42b 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -19,8 +19,6 @@ from synapse.util.caches import cache_counter, caches_by_name from twisted.internet import defer -from synapse.util import unwrapFirstError - from blist import sorteddict import logging import ujson as json From 8f4165628b7e8fa5510bd3cffe01bb93cb7e2df7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 17 Aug 2015 14:43:54 +0100 Subject: [PATCH 187/266] Add index receipts_linearized_room_stream --- synapse/storage/__init__.py | 2 +- .../storage/schema/delta/22/receipts_index.sql | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/22/receipts_index.sql diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index c6ce65b4c..f154b1c8a 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -54,7 +54,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 21 +SCHEMA_VERSION = 22 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/22/receipts_index.sql b/synapse/storage/schema/delta/22/receipts_index.sql new file mode 100644 index 000000000..b182b2b66 --- /dev/null +++ b/synapse/storage/schema/delta/22/receipts_index.sql @@ -0,0 +1,18 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( + room_id, stream_id +); From f72ed6c6a353bad4a54cb695eae12d39fd41ad24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 10:29:49 +0100 Subject: [PATCH 188/266] Remove debug try/catch --- synapse/handlers/presence.py | 98 +++++++++++++++++------------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 33d76efe0..b7664e30f 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -234,64 +234,60 @@ class PresenceHandler(BaseHandler): @defer.inlineCallbacks def get_states(self, target_users, auth_user, as_event=False, check_auth=True): - try: - local_users, remote_users = partitionbool( - target_users, - lambda u: self.hs.is_mine(u) - ) + local_users, remote_users = partitionbool( + target_users, + lambda u: self.hs.is_mine(u) + ) - if check_auth: - for u in local_users: - visible = yield self.is_presence_visible( - observer_user=auth_user, - observed_user=u - ) - - if not visible: - raise SynapseError(404, "Presence information not visible") - - results = {} - if local_users: - for u in local_users: - if u in self._user_cachemap: - results[u] = self._user_cachemap[u].get_state() - - local_to_user = {u.localpart: u for u in local_users} - - states = yield self.store.get_presence_states( - [u.localpart for u in local_users if u not in results] + if check_auth: + for u in local_users: + visible = yield self.is_presence_visible( + observer_user=auth_user, + observed_user=u ) - for local_part, state in states.items(): - res = {"presence": state["state"]} - if "status_msg" in state and state["status_msg"]: - res["status_msg"] = state["status_msg"] - results[local_to_user[local_part]] = res + if not visible: + raise SynapseError(404, "Presence information not visible") - for u in remote_users: - # TODO(paul): Have remote server send us permissions set - results[u] = self._get_or_offline_usercache(u).get_state() + results = {} + if local_users: + for u in local_users: + if u in self._user_cachemap: + results[u] = self._user_cachemap[u].get_state() - for state in results.values(): - if "last_active" in state: - state["last_active_ago"] = int( - self.clock.time_msec() - state.pop("last_active") + local_to_user = {u.localpart: u for u in local_users} + + states = yield self.store.get_presence_states( + [u.localpart for u in local_users if u not in results] + ) + + for local_part, state in states.items(): + res = {"presence": state["state"]} + if "status_msg" in state and state["status_msg"]: + res["status_msg"] = state["status_msg"] + results[local_to_user[local_part]] = res + + for u in remote_users: + # TODO(paul): Have remote server send us permissions set + results[u] = self._get_or_offline_usercache(u).get_state() + + for state in results.values(): + if "last_active" in state: + state["last_active_ago"] = int( + self.clock.time_msec() - state.pop("last_active") + ) + + if as_event: + for user, state in results.items(): + content = state + content["user_id"] = user.to_string() + + if "last_active" in content: + content["last_active_ago"] = int( + self._clock.time_msec() - content.pop("last_active") ) - if as_event: - for user, state in results.items(): - content = state - content["user_id"] = user.to_string() - - if "last_active" in content: - content["last_active_ago"] = int( - self._clock.time_msec() - content.pop("last_active") - ) - - results[user] = {"type": "m.presence", "content": content} - except: - logger.exception(":(") - raise + results[user] = {"type": "m.presence", "content": content} defer.returnValue(results) From 776ee6d92b8672c36723b0b8dc9ae3467f34c08f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 10:30:07 +0100 Subject: [PATCH 189/266] Doc strings --- synapse/handlers/presence.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index b7664e30f..1177cbe51 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -192,6 +192,20 @@ class PresenceHandler(BaseHandler): @defer.inlineCallbacks def get_state(self, target_user, auth_user, as_event=False, check_auth=True): + """Get the current presence state of the given user. + + Args: + target_user (UserID): The user whose presence we want + auth_user (UserID): The user requesting the presence, used for + checking if said user is allowed to see the persence of the + `target_user` + as_event (bool): Format the return as an event or not? + check_auth (bool): Perform the auth checks or not? + + Returns: + dict: The presence state of the `target_user`, whose format depends + on the `as_event` argument. + """ if self.hs.is_mine(target_user): if check_auth: visible = yield self.is_presence_visible( @@ -234,6 +248,20 @@ class PresenceHandler(BaseHandler): @defer.inlineCallbacks def get_states(self, target_users, auth_user, as_event=False, check_auth=True): + """A batched version of the `get_state` method that accepts a list of + `target_users` + + Args: + target_users (list): The list of UserID's whose presence we want + auth_user (UserID): The user requesting the presence, used for + checking if said user is allowed to see the persence of the + `target_users` + as_event (bool): Format the return as an event or not? + check_auth (bool): Perform the auth checks or not? + + Returns: + dict: A mapping from user -> presence_state + """ local_users, remote_users = partitionbool( target_users, lambda u: self.hs.is_mine(u) From 83eb627b5a50ef7cd8803f6c6fae9c5f5271bcb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 10:33:11 +0100 Subject: [PATCH 190/266] More helpful variable names --- synapse/handlers/presence.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 1177cbe51..2b103b48b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -268,10 +268,10 @@ class PresenceHandler(BaseHandler): ) if check_auth: - for u in local_users: + for user in local_users: visible = yield self.is_presence_visible( observer_user=auth_user, - observed_user=u + observed_user=user ) if not visible: @@ -279,9 +279,9 @@ class PresenceHandler(BaseHandler): results = {} if local_users: - for u in local_users: - if u in self._user_cachemap: - results[u] = self._user_cachemap[u].get_state() + for user in local_users: + if user in self._user_cachemap: + results[user] = self._user_cachemap[user].get_state() local_to_user = {u.localpart: u for u in local_users} @@ -295,9 +295,9 @@ class PresenceHandler(BaseHandler): res["status_msg"] = state["status_msg"] results[local_to_user[local_part]] = res - for u in remote_users: + for user in remote_users: # TODO(paul): Have remote server send us permissions set - results[u] = self._get_or_offline_usercache(u).get_state() + results[user] = self._get_or_offline_usercache(user).get_state() for state in results.values(): if "last_active" in state: From cfc503681f3febbd80faeda0da4134b98bcbcbbc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 10:49:23 +0100 Subject: [PATCH 191/266] Comments --- synapse/storage/receipts.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 46c06e42b..0ed9f45ec 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -158,15 +158,25 @@ class ReceiptsStore(SQLBaseStore): results = {} for row in txn_results: - results.setdefault(row["room_id"], { + # We want a single event per room, since we want to batch the + # receipts by room, event and type. + room_event = results.setdefault(row["room_id"], { "type": "m.receipt", "room_id": row["room_id"], "content": {}, - })["content"].setdefault( + }) + + # The content is of the form: + # {"$foo:bar": { "read": { "@user:host": }, .. }, .. } + event_id = room_event["content"].setdefault( row["event_id"], {} - ).setdefault( + ) + + receipt_type = event_id.setdefault( row["receipt_type"], {} - )[row["user_id"]] = json.loads(row["data"]) + ) + + receipt_type[row["user_id"]] = json.loads(row["data"]) results = { room_id: [results[room_id]] if room_id in results else [] From 85d0bc3bdce038726be4daad6560e1a2a982c9fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:00:38 +0100 Subject: [PATCH 192/266] Reduce cache size from obscenely large to quite large --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ecb62e6df..ab3ad5a07 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,7 +240,7 @@ class StateStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) - @cached(num_args=2, lru=True, max_entries=100000) + @cached(num_args=2, lru=True, max_entries=10000) def _get_state_group_for_event(self, room_id, event_id): return self._simple_select_one_onecol( table="event_to_state_groups", From 8e254862f49e5183b0a0e9c4a41ae1f1c3477418 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:11:33 +0100 Subject: [PATCH 193/266] Don't assume @cachedList function returns keys for everything --- synapse/util/caches/descriptors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 83bfec2f0..362944bc5 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -293,7 +293,7 @@ class CacheListDescriptor(object): # we can insert the new deferred into the cache. for arg in missing: observer = ret_d.observe() - observer.addCallback(lambda r, arg: r[arg], arg) + observer.addCallback(lambda r, arg: r.get(arg, None), arg) observer = ObservableDeferred(observer) From e55291ce5ec0c65c54363ef9366c2357df2ee44f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:17:37 +0100 Subject: [PATCH 194/266] None check --- synapse/handlers/presence.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 2b103b48b..748432959 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -290,6 +290,8 @@ class PresenceHandler(BaseHandler): ) for local_part, state in states.items(): + if stat is None: + continue res = {"presence": state["state"]} if "status_msg" in state and state["status_msg"]: res["status_msg"] = state["status_msg"] From 0d4abf77773ca0af73422aff1a35bb73c9235e1f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:19:08 +0100 Subject: [PATCH 195/266] Typo --- synapse/handlers/presence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 748432959..e91e81831 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -290,7 +290,7 @@ class PresenceHandler(BaseHandler): ) for local_part, state in states.items(): - if stat is None: + if state is None: continue res = {"presence": state["state"]} if "status_msg" in state and state["status_msg"]: From 8199475ce062c8ed7b050ddecd51c6c3ec22e560 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:44:10 +0100 Subject: [PATCH 196/266] Ensure we never return a None event from _get_state_for_groups --- synapse/storage/state.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ab3ad5a07..b32141281 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -405,16 +405,21 @@ class StateStore(SQLBaseStore): state_event = state_events[event_id] state_dict[(state_event.type, state_event.state_key)] = state_event + results[group] = state_dict + self._state_group_cache.update( cache_seq_num, key=group, - value=state_dict, + value=results[group], full=(types is None), ) - # We replace here to remove all the entries with None values. + # Remove all the entries with None values. The None values were just + # used for bookkeeping in the cache. + for group, state_dict in results.items(): results[group] = { - key: value for key, value in state_dict.items() if value + key: event for key, event in state_dict.items() + if event } defer.returnValue(results) From d3da63f7667f93e30f6071c1f5a0a0ffb9419e79 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:47:00 +0100 Subject: [PATCH 197/266] Use more helpful variable names --- synapse/metrics/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 7b8a20108..2e307a03a 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -163,11 +163,11 @@ def runUntilCurrentTimer(func): # _newTimedCalls is one long list of *all* pending calls. Below loop # is based off of impl of reactor.runUntilCurrent - for p in reactor._newTimedCalls: - if p.time > now: + for delayed_call in reactor._newTimedCalls: + if delayed_call.time > now: break - if p.delayed_time > 0: + if delayed_call.delayed_time > 0: continue num_pending += 1 From 6e7d36a72c5778be6d1a79d296b1fcdce667839d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:51:08 +0100 Subject: [PATCH 198/266] Also check for presence of 'threadCallQueue' in reactor --- synapse/metrics/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 2e307a03a..d7bcad8a8 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -184,7 +184,14 @@ def runUntilCurrentTimer(func): return f -if hasattr(reactor, "runUntilCurrent") and hasattr(reactor, "_newTimedCalls"): +try: + # Ensure the reactor has all the attributes we expect + reactor.runUntilCurrent + reactor._newTimedCalls + reactor.threadCallQueue + # runUntilCurrent is called when we have pending calls. It is called once # per iteratation after fd polling. reactor.runUntilCurrent = runUntilCurrentTimer(reactor.runUntilCurrent) +except AttributeError: + pass From f704c10f29ac1b013966cc8e43e74d38449cee6a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 11:54:03 +0100 Subject: [PATCH 199/266] Rename unhelpful variable name --- synapse/storage/receipts.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0ed9f45ec..a53506354 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -168,13 +168,8 @@ class ReceiptsStore(SQLBaseStore): # The content is of the form: # {"$foo:bar": { "read": { "@user:host": }, .. }, .. } - event_id = room_event["content"].setdefault( - row["event_id"], {} - ) - - receipt_type = event_id.setdefault( - row["receipt_type"], {} - ) + event_entry = room_event["content"].setdefault(row["event_id"], {}) + receipt_type = event_entry.setdefault(row["receipt_type"], {}) receipt_type[row["user_id"]] = json.loads(row["data"]) From ee59af9ac0ede0efe97cb70edbe6ed9e21ff8db4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 15:16:28 +0100 Subject: [PATCH 200/266] Set request.authenticated_entity for application services --- synapse/api/auth.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index a7f428a96..1e3b0fbfb 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -352,6 +352,8 @@ class Auth(object): if not user_id: raise KeyError + request.authenticated_entity = user_id + defer.returnValue( (UserID.from_string(user_id), ClientInfo("", "")) ) @@ -425,6 +427,7 @@ class Auth(object): "Unrecognised access token.", errcode=Codes.UNKNOWN_TOKEN ) + request.authenticated_entity = service.sender defer.returnValue(service) except KeyError: raise AuthError( From 128ed32e6b64b7c590e288c29dfb993330a0ff8e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 15:51:23 +0100 Subject: [PATCH 201/266] Bump size of get_presence_state cache --- synapse/storage/presence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 9b136f311..34ca3b9a5 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -39,7 +39,7 @@ class PresenceStore(SQLBaseStore): desc="has_presence_state", ) - @cached() + @cached(max_entries=2000) def get_presence_state(self, user_localpart): return self._simple_select_one( table="presence", From 0bfdaf1f4fe73c6afd211a7744f82d8d0b2b1e9d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 16:26:07 +0100 Subject: [PATCH 202/266] Rejig the code to make it nicer --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b32141281..c831d8d38 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -398,6 +398,7 @@ class StateStore(SQLBaseStore): # for them again. state_dict = {key: None for key in types} state_dict.update(results[group]) + results[group] = state_dict else: state_dict = results[group] @@ -405,12 +406,10 @@ class StateStore(SQLBaseStore): state_event = state_events[event_id] state_dict[(state_event.type, state_event.state_key)] = state_event - results[group] = state_dict - self._state_group_cache.update( cache_seq_num, key=group, - value=results[group], + value=state_dict, full=(types is None), ) From a82938416db87adde7c7e7965b93c2eecf86695a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 18 Aug 2015 16:28:13 +0100 Subject: [PATCH 203/266] Remove newline because vertical whitespace makes mjark sad --- synapse/storage/state.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c831d8d38..c9110e630 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -417,8 +417,7 @@ class StateStore(SQLBaseStore): # used for bookkeeping in the cache. for group, state_dict in results.items(): results[group] = { - key: event for key, event in state_dict.items() - if event + key: event for key, event in state_dict.items() if event } defer.returnValue(results) From abc6986a24e7f843ffdfc1610833feb96462d5a8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 09:30:52 +0100 Subject: [PATCH 204/266] Fix regression where we incorrectly responded with a 200 to /login --- synapse/handlers/auth.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 98d99dd0a..4947c4051 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -283,7 +283,7 @@ class AuthHandler(BaseHandler): StoreError if there was a problem storing the token. LoginError if there was an authentication problem. """ - self._check_password(user_id, password) + yield self._check_password(user_id, password) reg_handler = self.hs.get_handlers().registration_handler access_token = reg_handler.generate_token(user_id) @@ -291,6 +291,7 @@ class AuthHandler(BaseHandler): yield self.store.add_access_token_to_user(user_id, access_token) defer.returnValue(access_token) + @defer.inlineCallbacks def _check_password(self, user_id, password): """Checks that user_id has passed password, raises LoginError if not.""" user_info = yield self.store.get_user_by_id(user_id=user_id) From 40da1f200d6a7778acc76562fcecbbcd3f97eee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 09:41:07 +0100 Subject: [PATCH 205/266] Remove an access token log line --- synapse/handlers/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 4947c4051..be2baeaec 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -287,7 +287,7 @@ class AuthHandler(BaseHandler): reg_handler = self.hs.get_handlers().registration_handler access_token = reg_handler.generate_token(user_id) - logger.info("Adding token %s for user %s", access_token, user_id) + logger.info("Logging in user %s", user_id) yield self.store.add_access_token_to_user(user_id, access_token) defer.returnValue(access_token) From a45ec7c651abdbe54bba4a8fe420e02c9059f390 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 10:08:12 +0100 Subject: [PATCH 206/266] Block on storing the current last_tokens --- synapse/push/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 36f450c31..483deb624 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -345,7 +345,7 @@ class Pusher(object): if processed: self.backoff_delay = Pusher.INITIAL_BACKOFF self.last_token = chunk['end'] - self.store.update_pusher_last_token_and_success( + yield self.store.update_pusher_last_token_and_success( self.app_id, self.pushkey, self.user_name, @@ -354,7 +354,7 @@ class Pusher(object): ) if self.failing_since: self.failing_since = None - self.store.update_pusher_failing_since( + yield self.store.update_pusher_failing_since( self.app_id, self.pushkey, self.user_name, @@ -362,7 +362,7 @@ class Pusher(object): else: if not self.failing_since: self.failing_since = self.clock.time_msec() - self.store.update_pusher_failing_since( + yield self.store.update_pusher_failing_since( self.app_id, self.pushkey, self.user_name, @@ -380,7 +380,7 @@ class Pusher(object): self.user_name, self.pushkey) self.backoff_delay = Pusher.INITIAL_BACKOFF self.last_token = chunk['end'] - self.store.update_pusher_last_token( + yield self.store.update_pusher_last_token( self.app_id, self.pushkey, self.user_name, @@ -388,7 +388,7 @@ class Pusher(object): ) self.failing_since = None - self.store.update_pusher_failing_since( + yield self.store.update_pusher_failing_since( self.app_id, self.pushkey, self.user_name, From 78fa346b0781cf42ef8772638f9f8abb26b9a36f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 10:08:31 +0100 Subject: [PATCH 207/266] Store the 'last_token' in the db, even if we processed no events --- synapse/push/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 483deb624..13002e0db 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -294,6 +294,12 @@ class Pusher(object): if not single_event: self.last_token = chunk['end'] logger.debug("Event stream timeout for pushkey %s", self.pushkey) + yield self.store.update_pusher_last_token( + self.app_id, + self.pushkey, + self.user_name, + self.last_token + ) return if not self.alive: From d7272f8d9d0ce3ac9a4095969453efef5aecce40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 12:03:09 +0100 Subject: [PATCH 208/266] Add canonical alias to the default power levels --- synapse/api/constants.py | 1 + synapse/handlers/room.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 7156ee4e7..60a0d336d 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -76,6 +76,7 @@ class EventTypes(object): Feedback = "m.room.message.feedback" RoomHistoryVisibility = "m.room.history_visibility" + CanonicalAlias = "m.room.canonical_alias" # These are used for validation Message = "m.room.message" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7511d294f..c56112a92 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -250,6 +250,7 @@ class RoomCreationHandler(BaseHandler): EventTypes.Name: 100, EventTypes.PowerLevels: 100, EventTypes.RoomHistoryVisibility: 100, + EventTypes.CanonicalAlias: 100, }, "events_default": 0, "state_default": 50, From daa01842f889a8d93a33d7e11cddc1b72700810e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Aug 2015 13:46:03 +0100 Subject: [PATCH 209/266] Don't get apservice interested rooms in RoomHandler.get_joined_rooms_for_users --- synapse/handlers/events.py | 10 +++++++++- synapse/handlers/room.py | 10 ++-------- synapse/handlers/sync.py | 24 +++++++++++++++++++++--- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 993d33ba4..f9ca2f863 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -70,7 +70,15 @@ class EventStreamHandler(BaseHandler): self._streams_per_user[auth_user] += 1 rm_handler = self.hs.get_handlers().room_member_handler - room_ids = yield rm_handler.get_joined_rooms_for_user(auth_user) + + app_service = yield self.store.get_app_service_by_user_id( + auth_user.to_string() + ) + if app_service: + rooms = yield self.store.get_app_service_rooms(app_service) + room_ids = set(r.room_id for r in rooms) + else: + room_ids = yield rm_handler.get_joined_rooms_for_user(auth_user) if timeout: # If they've set a timeout set a minimum limit. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7511d294f..82c16013a 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -557,15 +557,9 @@ class RoomMemberHandler(BaseHandler): """Returns a list of roomids that the user has any of the given membership states in.""" - app_service = yield self.store.get_app_service_by_user_id( - user.to_string() + rooms = yield self.store.get_rooms_for_user( + user.to_string(), ) - if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - else: - rooms = yield self.store.get_rooms_for_user( - user.to_string(), - ) # For some reason the list of events contains duplicates # TODO(paul): work out why because I really don't think it should diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 7206ae23d..353a41605 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -96,9 +96,18 @@ class SyncHandler(BaseHandler): return self.current_sync_for_user(sync_config, since_token) rm_handler = self.hs.get_handlers().room_member_handler - room_ids = yield rm_handler.get_joined_rooms_for_user( - sync_config.user + + app_service = yield self.store.get_app_service_by_user_id( + sync_config.user.to_string() ) + if app_service: + rooms = yield self.store.get_app_service_rooms(app_service) + room_ids = set(r.room_id for r in rooms) + else: + room_ids = yield rm_handler.get_joined_rooms_for_user( + sync_config.user + ) + result = yield self.notifier.wait_for_events( sync_config.user, room_ids, sync_config.filter, timeout, current_sync_callback @@ -229,7 +238,16 @@ class SyncHandler(BaseHandler): logger.debug("Typing %r", typing_by_room) rm_handler = self.hs.get_handlers().room_member_handler - room_ids = yield rm_handler.get_joined_rooms_for_user(sync_config.user) + app_service = yield self.store.get_app_service_by_user_id( + sync_config.user.to_string() + ) + if app_service: + rooms = yield self.store.get_app_service_rooms(app_service) + room_ids = set(r.room_id for r in rooms) + else: + room_ids = yield rm_handler.get_joined_rooms_for_user( + sync_config.user + ) # TODO (mjark): Does public mean "published"? published_rooms = yield self.store.get_rooms(is_public=True) From aadb2238c9647186711933666851def5e37a8dbf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 Aug 2015 09:55:04 +0100 Subject: [PATCH 210/266] Check that the canonical room alias actually points to the room --- synapse/handlers/_base.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index d6c064b39..e91f1129d 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -18,7 +18,7 @@ from twisted.internet import defer from synapse.api.errors import LimitExceededError, SynapseError from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.api.constants import Membership, EventTypes -from synapse.types import UserID +from synapse.types import UserID, RoomAlias from synapse.util.logcontext import PreserveLoggingContext @@ -130,6 +130,22 @@ class BaseHandler(object): returned_invite.signatures ) + if event.type == EventTypes.CanonicalAlias: + # Check the alias is acually valid (at this time at least) + room_alias_str = event.content.get("alias", None) + if room_alias_str: + room_alias = RoomAlias.from_string(room_alias_str) + directory_handler = self.hs.get_handlers().directory_handler + mapping = yield directory_handler.get_association(room_alias) + + if mapping["room_id"] != event.room_id: + raise SynapseError( + 400, + "Room alias %s does not point to the room" % ( + room_alias_str, + ) + ) + destinations = set(extra_destinations) for k, s in context.current_state.items(): try: From 4cf302de5b1b4f4494a9445e0d85e4c9c24ff73d Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 20 Aug 2015 10:31:18 +0100 Subject: [PATCH 211/266] Comma comma comma comma comma chameleon --- synapse/rest/client/v1/login.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index cb9791760..0d5eafd0f 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -86,7 +86,7 @@ class LoginRestServlet(ClientV1RestServlet): user_id, self.hs.hostname).to_string() token = yield self.handlers.auth_handler.login_with_password( - user_id=user_id + user_id=user_id, password=login_submission["password"]) result = { From 9d720223f25982500ba4076caef9b70b608e0292 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 Aug 2015 14:12:01 +0100 Subject: [PATCH 212/266] Bump version and changelog --- CHANGES.rst | 73 +++++++++++++++++++++++++++++++++++++++++++++ synapse/__init__.py | 2 +- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6a5fce899..e2c6afb6a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,76 @@ +Changes in synapse v0.10.0-rc1 (2015-08-20) +=========================================== + +Also see v0.9.4-rc1 changelog, which has been amalgamated into this release. + +General: + +* Upgrade to Twisted 15 (PR #173) +* Add support for serving and fetching encryption keys over federation. + (PR #208) +* Add support for logging in with email address (PR #234) +* Add support for new ``m.room.canonical_alias`` event. (PR #233) +* Error if a user tries to register with an email already in use. (PR #211) +* Add extra and improve existing caches (PR #212, #219, #226, #228) +* Batch various storage request (PR #226, #228) +* Fix bug where we didn't correctly log the entity that triggered the request + if the request came in via an application service (PR #230) +* Fix bug where we needlessly regenerated the full list of rooms an AS is + interested in. (PR #232) + + +Configuration: + +* Add ``--generate-keys`` that will generate any missing cert and key files in + the configuration files. This is equivalent to running ``--generate-config`` + on an existing configuration file. (PR #220) +* ``--generate-config`` now no longer requires a ``--server-name`` parameter + when used on existing configuration files. (PR #220) +* Add ``--print-pidfile`` flag that controls the printing of the pid to stdout + of the demonised process. (PR #213) + +Media Repository: + +* Fix bug where we picked a lower resolution image than requested. (PR #205) +* Add support for specifying if a the media repository should dynamically + thumbnail images or not. (PR #206) + +Metrics: + +* Add statistics from the reactor to the metrics API. (PR #224, #225) + +Demo Homeservers: + +* Fix starting the demo homeservers without rate-limiting enabled. (PR #182) +* Fix enabling registration on demo homeservers (PR #223) + + +Changes in synapse v0.9.4-rc1 (2015-07-21) +========================================== + +General: + +* Add basic implementation of receipts. (SPEC-99) +* Add support for configuration presets in room creation API. (PR #203) +* Add auth event that limits the visibility of history for new users. + (SPEC-134) +* Add SAML2 login/registration support. (PR #201. Thanks Muthu Subramanian!) +* Add client side key management APIs for end to end encryption. (PR #198) +* Change power level semantics so that you cannot kick, ban or change power + levels of users that have equal or greater power level than you. (SYN-192) +* Improve performance by bulk inserting events where possible. (PR #193) +* Improve performance by bulk verifying signatures where possible. (PR #194) + + +Configuration: + +* Add support for including TLS certificate chains. + +Media Repository: + +* Add Content-Disposition headers to content repository responses. (SYN-150) + + Changes in synapse v0.9.3 (2015-07-01) ====================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 96e37308d..5853165a2 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.9.3" +__version__ = "0.10.0-rc1" From 23b21e5215d3a4d88925aa28e5bcaeda169d81cc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 Aug 2015 14:25:57 +0100 Subject: [PATCH 213/266] Update changelog --- CHANGES.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.rst b/CHANGES.rst index e2c6afb6a..e6d1a3730 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -17,6 +17,7 @@ General: if the request came in via an application service (PR #230) * Fix bug where we needlessly regenerated the full list of rooms an AS is interested in. (PR #232) +* Add support for AS's to use v2_alpha registration API (PR #210) Configuration: From 9b63def3887779c7c9a1aeadd2d16df506155953 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 Aug 2015 14:35:40 +0100 Subject: [PATCH 214/266] Add m.room.avatar to default power levels. Change default required power levels of such events to 50 --- synapse/api/constants.py | 1 + synapse/handlers/room.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 60a0d336d..1423986c1 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -77,6 +77,7 @@ class EventTypes(object): RoomHistoryVisibility = "m.room.history_visibility" CanonicalAlias = "m.room.canonical_alias" + RoomAvatar = "m.room.avatar" # These are used for validation Message = "m.room.message" diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 8108c2763..c5d1001b5 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -247,10 +247,11 @@ class RoomCreationHandler(BaseHandler): }, "users_default": 0, "events": { - EventTypes.Name: 100, + EventTypes.Name: 50, EventTypes.PowerLevels: 100, EventTypes.RoomHistoryVisibility: 100, - EventTypes.CanonicalAlias: 100, + EventTypes.CanonicalAlias: 50, + EventTypes.RoomAvatar: 50, }, "events_default": 0, "state_default": 50, From bb9611bd46c77252e7ecf3ef90251cf4cd2f446e Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 20 Aug 2015 15:08:18 +0100 Subject: [PATCH 215/266] Add generic update instructions to UPGRADE.rst and add link to them from the README.rst --- README.rst | 11 ++++------- UPGRADE.rst | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 5ff53f2df..b01d693a4 100644 --- a/README.rst +++ b/README.rst @@ -362,14 +362,11 @@ This should end with a 'PASSED' result:: Upgrading an existing Synapse ============================= -IMPORTANT: Before upgrading an existing synapse to a new version, please -refer to UPGRADE.rst for any additional instructions. - -Otherwise, simply re-install the new codebase over the current one - e.g. -by ``pip install --process-dependency-links -https://github.com/matrix-org/synapse/tarball/master`` -if using pip, or by ``git pull`` if running off a git working copy. +The instructions for upgrading synapse are in `UPGRADE.rst`_. +Please check these instructions as upgrading may require extra steps for some +versions of synapse. +.. _UPGRADE.rst: UPGRADE.rst Setting up Federation ===================== diff --git a/UPGRADE.rst b/UPGRADE.rst index d98460f64..a82dabbd3 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -1,3 +1,36 @@ +Upgrading Synapse +================= + +Before upgrading check if any special steps are required to upgrade from the +what you currently have installed to current version of synapse. The extra +instructions that may be required are listed later in this document. + +If you installed synapse in a virtualenv then active that virtualenv before +upgrading. If synapse is installed in a virtualenv in ``~/.synapse/`` then run: + +.. code:: bash + + source ~/.synapse/bin/activate + +If you installed synapse using pip then upgrade to the latest version by +running: + +.. code:: bash + + pip install --upgrade --process-dependency-links https://github.com/matrix-org/synapse/tarball/master + +If you installed synapse using git then upgrade to the latest version by +running: + +.. code:: bash + + # Pull the latest version of the master branch. + git pull + # Update the versions of synapses python dependencies. + python synapse/python_dependencies.py | xargs -n1 pip install + + + Upgrading to v0.9.0 =================== From fd88ea19c0702d970af3a737a094234be1809b9e Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 20 Aug 2015 15:12:44 +0100 Subject: [PATCH 216/266] Tweak the wording a bit --- UPGRADE.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index a82dabbd3..35a0333a7 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -5,28 +5,28 @@ Before upgrading check if any special steps are required to upgrade from the what you currently have installed to current version of synapse. The extra instructions that may be required are listed later in this document. -If you installed synapse in a virtualenv then active that virtualenv before +If synapse was installed in a virtualenv then active that virtualenv before upgrading. If synapse is installed in a virtualenv in ``~/.synapse/`` then run: .. code:: bash source ~/.synapse/bin/activate -If you installed synapse using pip then upgrade to the latest version by +If synapse was installed using pip then upgrade to the latest version by running: .. code:: bash pip install --upgrade --process-dependency-links https://github.com/matrix-org/synapse/tarball/master -If you installed synapse using git then upgrade to the latest version by +If synapse was installed using git then upgrade to the latest version by running: .. code:: bash # Pull the latest version of the master branch. git pull - # Update the versions of synapses python dependencies. + # Update the versions of synapse's python dependencies. python synapse/python_dependencies.py | xargs -n1 pip install From 482648123f608fb6e77dc47960ddda501c8f5c70 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 20 Aug 2015 15:20:07 +0100 Subject: [PATCH 217/266] Clean up some of restructured text formatting in the README.rst --- README.rst | 62 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/README.rst b/README.rst index 5ff53f2df..01d8cb1a7 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ Matrix is an ambitious new ecosystem for open federated Instant Messaging and VoIP. The basics you need to know to get up and running are: - Everything in Matrix happens in a room. Rooms are distributed and do not - exist on any single server. Rooms can be located using convenience aliases + exist on any single server. Rooms can be located using convenience aliases like ``#matrix:matrix.org`` or ``#test:localhost:8448``. - Matrix user IDs look like ``@matthew:matrix.org`` (although in the future @@ -23,7 +23,7 @@ The overall architecture is:: accessed by the web client at http://matrix.org/beta or via an IRC bridge at irc://irc.freenode.net/matrix. -Synapse is currently in rapid development, but as of version 0.5 we believe it +Synapse is currently in rapid development, but as of version 0.5 we believe it is sufficiently stable to be run as an internet-facing service for real usage! About Matrix @@ -104,7 +104,7 @@ Installing prerequisites on Ubuntu or Debian:: sudo apt-get install build-essential python2.7-dev libffi-dev \ python-pip python-setuptools sqlite3 \ libssl-dev python-virtualenv libjpeg-dev - + Installing prerequisites on ArchLinux:: sudo pacman -S base-devel python2 python-pip \ @@ -115,7 +115,7 @@ Installing prerequisites on Mac OS X:: xcode-select --install sudo easy_install pip sudo pip install virtualenv - + To install the synapse homeserver run:: virtualenv -p python2.7 ~/.synapse @@ -180,7 +180,7 @@ The advantages of Postgres include: * allowing basic active/backup high-availability with a "hot spare" synapse pointing at the same DB master, as well as enabling DB replication in synapse itself. - + The only disadvantage is that the code is relatively new as of April 2015 and may have a few regressions relative to SQLite. @@ -190,8 +190,8 @@ For information on how to install and use PostgreSQL, please see Running Synapse =============== -To actually run your new homeserver, pick a working directory for Synapse to run -(e.g. ``~/.synapse``), and:: +To actually run your new homeserver, pick a working directory for Synapse to +run (e.g. ``~/.synapse``), and:: cd ~/.synapse source ./bin/activate @@ -214,13 +214,13 @@ defaults to python 3, but synapse currently assumes python 2.7 by default: pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):: sudo pip2.7 install --upgrade pip - + You also may need to explicitly specify python 2.7 again during the install request:: pip2.7 install --process-dependency-links \ https://github.com/matrix-org/synapse/tarball/master - + If you encounter an error with lib bcrypt causing an Wrong ELF Class: ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly compile it under the right architecture. (This should not be needed if @@ -228,7 +228,7 @@ installing under virtualenv):: sudo pip2.7 uninstall py-bcrypt sudo pip2.7 install py-bcrypt - + During setup of Synapse you need to call python2.7 directly again:: cd ~/.synapse @@ -236,25 +236,27 @@ During setup of Synapse you need to call python2.7 directly again:: --server-name machine.my.domain.name \ --config-path homeserver.yaml \ --generate-config - + ...substituting your host and domain name as appropriate. Windows Install --------------- Synapse can be installed on Cygwin. It requires the following Cygwin packages: - - gcc - - git - - libffi-devel - - openssl (and openssl-devel, python-openssl) - - python - - python-setuptools +- gcc +- git +- libffi-devel +- openssl (and openssl-devel, python-openssl) +- python +- python-setuptools The content repository requires additional packages and will be unable to process uploads without them: - - libjpeg8 - - libjpeg8-devel - - zlib + +- libjpeg8 +- libjpeg8-devel +- zlib + If you choose to install Synapse without these packages, you will need to reinstall ``pillow`` for changes to be applied, e.g. ``pip uninstall pillow`` ``pip install pillow --user`` @@ -276,8 +278,8 @@ Troubleshooting Troubleshooting Installation ---------------------------- -Synapse requires pip 1.7 or later, so if your OS provides too old a version and -you get errors about ``error: no such option: --process-dependency-links`` you +Synapse requires pip 1.7 or later, so if your OS provides too old a version and +you get errors about ``error: no such option: --process-dependency-links`` you may need to manually upgrade it:: sudo pip install --upgrade pip @@ -288,9 +290,9 @@ created. To reset the installation:: rm -rf /tmp/pip_install_matrix -pip seems to leak *lots* of memory during installation. For instance, a Linux -host with 512MB of RAM may run out of memory whilst installing Twisted. If this -happens, you will have to individually install the dependencies which are +pip seems to leak *lots* of memory during installation. For instance, a Linux +host with 512MB of RAM may run out of memory whilst installing Twisted. If this +happens, you will have to individually install the dependencies which are failing, e.g.:: pip install twisted @@ -301,8 +303,8 @@ will need to export CFLAGS=-Qunused-arguments. Troubleshooting Running ----------------------- -If synapse fails with ``missing "sodium.h"`` crypto errors, you may need -to manually upgrade PyNaCL, as synapse uses NaCl (http://nacl.cr.yp.to/) for +If synapse fails with ``missing "sodium.h"`` crypto errors, you may need +to manually upgrade PyNaCL, as synapse uses NaCl (http://nacl.cr.yp.to/) for encryption and digital signatures. Unfortunately PyNACL currently has a few issues (https://github.com/pyca/pynacl/issues/53) and @@ -313,7 +315,7 @@ fix try re-installing from PyPI or directly from # Install from PyPI pip install --user --upgrade --force pynacl - + # Install from github pip install --user https://github.com/pyca/pynacl/tarball/master @@ -431,7 +433,7 @@ private federation (``localhost:8080``, ``localhost:8081`` and http://localhost:8080. Simply run:: demo/start.sh - + This is mainly useful just for development purposes. Running The Demo Web Client @@ -494,7 +496,7 @@ time. Where's the spec?! ================== -The source of the matrix spec lives at https://github.com/matrix-org/matrix-doc. +The source of the matrix spec lives at https://github.com/matrix-org/matrix-doc. A recent HTML snapshot of this lives at http://matrix.org/docs/spec From 8a951540f6b47c8cc33dc41f3178295c55350c74 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 20 Aug 2015 15:22:26 +0100 Subject: [PATCH 218/266] Further formatting clean ups --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 01d8cb1a7..e63f86b60 100644 --- a/README.rst +++ b/README.rst @@ -174,12 +174,12 @@ traditionally used for convenience and simplicity. The advantages of Postgres include: - * significant performance improvements due to the superior threading and - caching model, smarter query optimiser - * allowing the DB to be run on separate hardware - * allowing basic active/backup high-availability with a "hot spare" synapse - pointing at the same DB master, as well as enabling DB replication in - synapse itself. +* significant performance improvements due to the superior threading and + caching model, smarter query optimiser +* allowing the DB to be run on separate hardware +* allowing basic active/backup high-availability with a "hot spare" synapse + pointing at the same DB master, as well as enabling DB replication in + synapse itself. The only disadvantage is that the code is relatively new as of April 2015 and may have a few regressions relative to SQLite. @@ -326,7 +326,7 @@ If running `$ synctl start` fails with 'returned non-zero exit status 1', you will need to explicitly call Python2.7 - either running as:: python2.7 -m synapse.app.homeserver --daemonize -c homeserver.yaml - + ...or by editing synctl with the correct python executable. Synapse Development From ca0d28ef34022874ebc9168146df53a10bcb925e Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 20 Aug 2015 15:35:14 +0100 Subject: [PATCH 219/266] Another use of check_password that got missed in the yield fix --- synapse/handlers/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index be2baeaec..ff2c66f44 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -162,7 +162,7 @@ class AuthHandler(BaseHandler): if not user_id.startswith('@'): user_id = UserID.create(user_id, self.hs.hostname).to_string() - self._check_password(user_id, password) + yield self._check_password(user_id, password) defer.returnValue(user_id) @defer.inlineCallbacks From f764f9264734fdcf83869022fed23bdab5e4c8dc Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 20 Aug 2015 15:35:54 +0100 Subject: [PATCH 220/266] Remove spurious extra arg to set_password --- synapse/rest/client/v2_alpha/account.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 897c54b53..522a312c9 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -79,7 +79,7 @@ class PasswordRestServlet(RestServlet): new_password = params['new_password'] yield self.auth_handler.set_password( - user_id, new_password, None + user_id, new_password ) defer.returnValue((200, {})) From 3e9ee62db0bfb3991254f7152b70491cbda385b1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 09:15:13 +0100 Subject: [PATCH 221/266] Add missing param in store.get_state_groups invocation --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1e3dccf5a..4ff20599d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -874,7 +874,7 @@ class FederationHandler(BaseHandler): raise AuthError(403, "Host not in room.") state_groups = yield self.store.get_state_groups( - [event_id] + room_id, [event_id] ) if state_groups: From 1f7642efa9f7165706593d745aab467b7fdddd67 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 09:36:07 +0100 Subject: [PATCH 222/266] Fix bug where we didn't correctly serialize the redacted_because key over federation --- synapse/events/__init__.py | 5 ++++- synapse/storage/events.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 39ce4f7c4..3fb4b5e79 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -90,7 +90,7 @@ class EventBase(object): d = dict(self._event_dict) d.update({ "signatures": self.signatures, - "unsigned": self.unsigned, + "unsigned": dict(self.unsigned), }) return d @@ -109,6 +109,9 @@ class EventBase(object): pdu_json.setdefault("unsigned", {})["age"] = int(age) del pdu_json["unsigned"]["age_ts"] + # This may be a frozen event + pdu_json["unsigned"].pop("redacted_because", None) + return pdu_json def __set__(self, instance, value): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5b6491802..e3eabab13 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -811,6 +811,8 @@ class EventsStore(SQLBaseStore): ) if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly ev.unsigned["redacted_because"] = because if get_prev_content and "replaces_state" in ev.unsigned: From aa3c9c7bd0736bca1b3626c87535192b89431583 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 10:57:47 +0100 Subject: [PATCH 223/266] Don't allow people to register user ids which only differ by case to an existing one --- synapse/handlers/register.py | 4 ++-- synapse/storage/registration.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 39392d9fd..86390a367 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -57,8 +57,8 @@ class RegistrationHandler(BaseHandler): yield self.check_user_id_is_valid(user_id) - u = yield self.store.get_user_by_id(user_id) - if u: + users = yield self.store.get_users_by_id_case_insensitive(user_id) + if users: raise SynapseError( 400, "User ID already taken.", diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index bf803f2c6..25adecaf6 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -98,6 +98,17 @@ class RegistrationStore(SQLBaseStore): allow_none=True, ) + def get_users_by_id_case_insensitive(self, user_id): + def f(txn): + sql = ( + "SELECT name, password_hash FROM users" + " WHERE name = lower(?)" + ) + txn.execute(sql, (user_id,)) + return self.cursor_to_dict(txn) + + return self.runInteraction("get_users_by_id_case_insensitive", f) + @defer.inlineCallbacks def user_set_password_hash(self, user_id, password_hash): """ From 42f12ad92f5bc372569f15ffc81e9cf8146d2ac6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 11:34:43 +0100 Subject: [PATCH 224/266] When logging in fetch user by user_id case insensitively, *unless* there are multiple case insensitive matches, in which case require the exact user_id --- synapse/handlers/auth.py | 31 +++++++++++++++++++++++-------- synapse/rest/client/v1/login.py | 5 +++-- synapse/storage/registration.py | 7 +++++-- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index ff2c66f44..058a0f416 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -162,7 +162,8 @@ class AuthHandler(BaseHandler): if not user_id.startswith('@'): user_id = UserID.create(user_id, self.hs.hostname).to_string() - yield self._check_password(user_id, password) + user_id, password_hash = yield self._find_user_id_and_pwd_hash(user_id) + self._check_password(user_id, password, password_hash) defer.returnValue(user_id) @defer.inlineCallbacks @@ -283,23 +284,37 @@ class AuthHandler(BaseHandler): StoreError if there was a problem storing the token. LoginError if there was an authentication problem. """ - yield self._check_password(user_id, password) + user_id, password_hash = yield self._find_user_id_and_pwd_hash(user_id) + self._check_password(user_id, password, password_hash) reg_handler = self.hs.get_handlers().registration_handler access_token = reg_handler.generate_token(user_id) logger.info("Logging in user %s", user_id) yield self.store.add_access_token_to_user(user_id, access_token) - defer.returnValue(access_token) + defer.returnValue((user_id, access_token)) @defer.inlineCallbacks - def _check_password(self, user_id, password): - """Checks that user_id has passed password, raises LoginError if not.""" - user_info = yield self.store.get_user_by_id(user_id=user_id) - if not user_info: + def _find_user_id_and_pwd_hash(self, user_id): + user_infos = yield self.store.get_users_by_id_case_insensitive(user_id) + if not user_infos: logger.warn("Attempted to login as %s but they do not exist", user_id) raise LoginError(403, "", errcode=Codes.FORBIDDEN) - stored_hash = user_info["password_hash"] + if len(user_infos) > 1: + if user_id not in user_infos: + logger.warn( + "Attempted to login as %s but it matches more than one user " + "inexactly: %r", + user_id, user_infos.keys() + ) + raise LoginError(403, "", errcode=Codes.FORBIDDEN) + + defer.returnValue((user_id, user_infos[user_id])) + else: + defer.returnValue(user_infos.popitem()) + + def _check_password(self, user_id, password, stored_hash): + """Checks that user_id has passed password, raises LoginError if not.""" if not bcrypt.checkpw(password, stored_hash): logger.warn("Failed password login for user %s", user_id) raise LoginError(403, "", errcode=Codes.FORBIDDEN) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 0d5eafd0f..2444f2736 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -83,9 +83,10 @@ class LoginRestServlet(ClientV1RestServlet): if not user_id.startswith('@'): user_id = UserID.create( - user_id, self.hs.hostname).to_string() + user_id, self.hs.hostname + ).to_string() - token = yield self.handlers.auth_handler.login_with_password( + user_id, token = yield self.handlers.auth_handler.login_with_password( user_id=user_id, password=login_submission["password"]) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 25adecaf6..586628579 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -99,13 +99,16 @@ class RegistrationStore(SQLBaseStore): ) def get_users_by_id_case_insensitive(self, user_id): + """Gets users that match user_id case insensitively. + Returns a mapping of user_id -> password_hash. + """ def f(txn): sql = ( "SELECT name, password_hash FROM users" - " WHERE name = lower(?)" + " WHERE lower(name) = lower(?)" ) txn.execute(sql, (user_id,)) - return self.cursor_to_dict(txn) + return dict(txn.fetchall()) return self.runInteraction("get_users_by_id_case_insensitive", f) From fd5ad0f00ec963e9722d9f5bbe526dc84038e408 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 11:45:43 +0100 Subject: [PATCH 225/266] Doc string --- synapse/handlers/auth.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 058a0f416..602c5bcd8 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -295,6 +295,12 @@ class AuthHandler(BaseHandler): @defer.inlineCallbacks def _find_user_id_and_pwd_hash(self, user_id): + """Checks to see if a user with the given id exists. Will check case + insensitively, but will throw if there are multiple inexact matches. + + Returns: + tuple: A 2-tuple of `(canonical_user_id, password_hash)` + """ user_infos = yield self.store.get_users_by_id_case_insensitive(user_id) if not user_infos: logger.warn("Attempted to login as %s but they do not exist", user_id) From b9490e8cbb384a6ec1612a26305c2700a0365046 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 13:07:37 +0100 Subject: [PATCH 226/266] Upate changelog --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index e6d1a3730..8eca8fde0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,9 @@ General: (PR #208) * Add support for logging in with email address (PR #234) * Add support for new ``m.room.canonical_alias`` event. (PR #233) +* Change synapse to treat user IDs case insensitively during registration and + login. (If two users already exist with case insensitive matching user ids, + synapse will continue to require them to specify their user ids exactly.) * Error if a user tries to register with an email already in use. (PR #211) * Add extra and improve existing caches (PR #212, #219, #226, #228) * Batch various storage request (PR #226, #228) From 0f6a25f670cd7a2c319910e3a5e847f1d50bae1a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 13:07:56 +0100 Subject: [PATCH 227/266] Upate changelog --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8eca8fde0..287b04c3f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,4 +1,4 @@ -Changes in synapse v0.10.0-rc1 (2015-08-20) +Changes in synapse v0.10.0-rc1 (2015-08-21) =========================================== Also see v0.9.4-rc1 changelog, which has been amalgamated into this release. From 1bd1a43073f2b1f657f4c9a5bab8326c1aa19c53 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 21 Aug 2015 14:30:34 +0100 Subject: [PATCH 228/266] Actually check if event_id isn't returned by _get_state_groups --- synapse/storage/state.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c9110e630..9630efcfc 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -403,8 +403,15 @@ class StateStore(SQLBaseStore): state_dict = results[group] for event_id in state_ids: - state_event = state_events[event_id] - state_dict[(state_event.type, state_event.state_key)] = state_event + try: + state_event = state_events[event_id] + state_dict[(state_event.type, state_event.state_key)] = state_event + except KeyError: + # Hmm. So we do don't have that state event? Interesting. + logger.warn( + "Can't find state event %r for state group %r", + event_id, group, + ) self._state_group_cache.update( cache_seq_num, From 457970c724b7d31f9ec6c153a6011a57b21ef157 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Sun, 23 Aug 2015 13:44:23 +0100 Subject: [PATCH 229/266] Don't insert events into 'event_*_extremeties' tables if they're outliers --- synapse/storage/event_federation.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 25cc84eb9..dda3027b6 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -331,7 +331,10 @@ class EventFederationStore(SQLBaseStore): txn.executemany( query, - [(ev.event_id, ev.room_id, ev.event_id) for ev in events] + [ + (ev.event_id, ev.room_id, ev.event_id) for ev in events + if not ev.internal_metadata.is_outlier() + ] ) query = ( @@ -358,7 +361,10 @@ class EventFederationStore(SQLBaseStore): ) txn.executemany( query, - [(ev.event_id, ev.room_id) for ev in events] + [ + (ev.event_id, ev.room_id) for ev in events + if not ev.internal_metadata.is_outlier() + ] ) for room_id in events_by_room: From f8f3d72e2b6e5b38bb6ab8e057ede454d77d114f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Aug 2015 16:19:43 +0100 Subject: [PATCH 230/266] Don't make pushers handle presence/typing events --- synapse/handlers/events.py | 10 ++++++++-- synapse/notifier.py | 7 ++++++- synapse/push/__init__.py | 8 +++++--- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index f9ca2f863..891502c04 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -49,7 +49,12 @@ class EventStreamHandler(BaseHandler): @defer.inlineCallbacks @log_function def get_stream(self, auth_user_id, pagin_config, timeout=0, - as_client_event=True, affect_presence=True): + as_client_event=True, affect_presence=True, + only_room_events=False): + """Fetches the events stream for a given user. + + If `only_room_events` is `True` only room events will be returned. + """ auth_user = UserID.from_string(auth_user_id) try: @@ -89,7 +94,8 @@ class EventStreamHandler(BaseHandler): timeout = random.randint(int(timeout*0.9), int(timeout*1.1)) events, tokens = yield self.notifier.get_events_for( - auth_user, room_ids, pagin_config, timeout + auth_user, room_ids, pagin_config, timeout, + only_room_events=only_room_events ) time_now = self.clock.time_msec() diff --git a/synapse/notifier.py b/synapse/notifier.py index dbd8efe9f..f998fc83b 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -328,10 +328,13 @@ class Notifier(object): defer.returnValue(result) @defer.inlineCallbacks - def get_events_for(self, user, rooms, pagination_config, timeout): + def get_events_for(self, user, rooms, pagination_config, timeout, + only_room_events=False): """ For the given user and rooms, return any new events for them. If there are no new events wait for up to `timeout` milliseconds for any new events to happen before returning. + + If `only_room_events` is `True` only room events will be returned. """ from_token = pagination_config.from_token if not from_token: @@ -352,6 +355,8 @@ class Notifier(object): after_id = getattr(after_token, keyname) if before_id == after_id: continue + if only_room_events and name != "room": + continue new_events, new_key = yield source.get_new_events_for_user( user, getattr(from_token, keyname), limit, ) diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 13002e0db..f1952b5a0 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -249,7 +249,9 @@ class Pusher(object): # we fail to dispatch the push) config = PaginationConfig(from_token=None, limit='1') chunk = yield self.evStreamHandler.get_stream( - self.user_name, config, timeout=0) + self.user_name, config, timeout=0, affect_presence=False, + only_room_events=True + ) self.last_token = chunk['end'] self.store.update_pusher_last_token( self.app_id, self.pushkey, self.user_name, self.last_token @@ -280,8 +282,8 @@ class Pusher(object): config = PaginationConfig(from_token=from_tok, limit='1') timeout = (300 + random.randint(-60, 60)) * 1000 chunk = yield self.evStreamHandler.get_stream( - self.user_name, config, - timeout=timeout, affect_presence=False + self.user_name, config, timeout=timeout, affect_presence=False, + only_room_events=True ) # limiting to 1 may get 1 event plus 1 presence event, so From 51c53369a318262ecc3adc3777be8b838509d19d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Aug 2015 16:38:20 +0100 Subject: [PATCH 231/266] Do auth checks *before* persisting the event --- synapse/handlers/_base.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index e91f1129d..cb992143f 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -107,6 +107,22 @@ class BaseHandler(object): if not suppress_auth: self.auth.check(event, auth_events=context.current_state) + if event.type == EventTypes.CanonicalAlias: + # Check the alias is acually valid (at this time at least) + room_alias_str = event.content.get("alias", None) + if room_alias_str: + room_alias = RoomAlias.from_string(room_alias_str) + directory_handler = self.hs.get_handlers().directory_handler + mapping = yield directory_handler.get_association(room_alias) + + if mapping["room_id"] != event.room_id: + raise SynapseError( + 400, + "Room alias %s does not point to the room" % ( + room_alias_str, + ) + ) + (event_stream_id, max_stream_id) = yield self.store.persist_event( event, context=context ) @@ -130,22 +146,6 @@ class BaseHandler(object): returned_invite.signatures ) - if event.type == EventTypes.CanonicalAlias: - # Check the alias is acually valid (at this time at least) - room_alias_str = event.content.get("alias", None) - if room_alias_str: - room_alias = RoomAlias.from_string(room_alias_str) - directory_handler = self.hs.get_handlers().directory_handler - mapping = yield directory_handler.get_association(room_alias) - - if mapping["room_id"] != event.room_id: - raise SynapseError( - 400, - "Room alias %s does not point to the room" % ( - room_alias_str, - ) - ) - destinations = set(extra_destinations) for k, s in context.current_state.items(): try: From 571ac105e6bf9c0de328bb22b095b97fbd09e5ae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Aug 2015 17:10:45 +0100 Subject: [PATCH 232/266] Bump version and changelog --- CHANGES.rst | 10 ++++++++++ synapse/__init__.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 287b04c3f..f4946c6df 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,13 @@ +Changes in synapse v0.10.0-rc2 (2015-08-24) +=========================================== + +* Fix bug where we incorrectly populated the ``event_forward_extremities`` + table, resulting in problems joining large remote rooms (e.g. + ``#matrix:matrix.org``) +* Reduce the number of times we wake up pushers by not listening for presence + or typing events, reducing the CPU cost of each pusher. + + Changes in synapse v0.10.0-rc1 (2015-08-21) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 5853165a2..b4eab89d4 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc1" +__version__ = "0.10.0-rc2" From 86cef6a91b330d99b86cf2eacf75a446ce2e2955 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 12:01:23 +0100 Subject: [PATCH 233/266] Allow specifying a directory to host a web client from --- synapse/app/homeserver.py | 8 +++++--- synapse/config/server.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f04493f92..2c59457cd 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -97,9 +97,11 @@ class SynapseHomeServer(HomeServer): return JsonResource(self) def build_resource_for_web_client(self): - import syweb - syweb_path = os.path.dirname(syweb.__file__) - webclient_path = os.path.join(syweb_path, "webclient") + webclient_path = self.get_config().web_client_location + if not webclient_path: + import syweb + syweb_path = os.path.dirname(syweb.__file__) + webclient_path = os.path.join(syweb_path, "webclient") # GZip is disabled here due to # https://twistedmatrix.com/trac/ticket/7678 # (It can stay enabled for the API resources: they call diff --git a/synapse/config/server.py b/synapse/config/server.py index f9a3b5f15..a03e55c22 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -22,6 +22,7 @@ class ServerConfig(Config): self.server_name = config["server_name"] self.pid_file = self.abspath(config.get("pid_file")) self.web_client = config["web_client"] + self.web_client_location = config.get("web_client_location", None) self.soft_file_limit = config["soft_file_limit"] self.daemonize = config.get("daemonize") self.print_pidfile = config.get("print_pidfile") From d9088c923f32fe1d2b6027320bb8fd5e46e1a428 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 13:34:50 +0100 Subject: [PATCH 234/266] Remove dependency on matrix-angular-sdk --- synapse/app/homeserver.py | 9 ++++++++- synapse/python_dependencies.py | 6 +----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 2c59457cd..6255e9676 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -99,7 +99,14 @@ class SynapseHomeServer(HomeServer): def build_resource_for_web_client(self): webclient_path = self.get_config().web_client_location if not webclient_path: - import syweb + try: + import syweb + except ImportError: + quit_with_error( + "Could not find a webclient. Please either install syweb\n" + "or configure the location of the source to server via\n" + "the config option `web_client_location`" + ) syweb_path = os.path.dirname(syweb.__file__) webclient_path = os.path.join(syweb_path, "webclient") # GZip is disabled here due to diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index fa06480ad..61a362bb1 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -34,11 +34,7 @@ REQUIREMENTS = { "blist": ["blist"], "pysaml2": ["saml2"], } -CONDITIONAL_REQUIREMENTS = { - "web_client": { - "matrix_angular_sdk>=0.6.6": ["syweb>=0.6.6"], - } -} +CONDITIONAL_REQUIREMENTS = {} def requirements(config=None, include_conditional=False): From 8b52fe48b5f222bb444c93307c9681028693db2b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 14:10:31 +0100 Subject: [PATCH 235/266] Revert previous commit. Instead, always download matrix-angular-sdk as a requirement, but don't complain (when we do check_requirements) if we don't have it when we start synapse. --- synapse/python_dependencies.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 61a362bb1..82e28133e 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -34,13 +34,17 @@ REQUIREMENTS = { "blist": ["blist"], "pysaml2": ["saml2"], } -CONDITIONAL_REQUIREMENTS = {} +CONDITIONAL_REQUIREMENTS = { + "web_client": { + "matrix_angular_sdk>=0.6.6": ["syweb>=0.6.6"], + } +} def requirements(config=None, include_conditional=False): reqs = REQUIREMENTS.copy() - for key, req in CONDITIONAL_REQUIREMENTS.items(): - if (config and getattr(config, key)) or include_conditional: + if include_conditional: + for _, req in CONDITIONAL_REQUIREMENTS.items(): reqs.update(req) return reqs From 37403ab06cd8c82c2910d44f1b78f2338c2dab9b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 14:19:09 +0100 Subject: [PATCH 236/266] Update the log message --- synapse/app/homeserver.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 6255e9676..7f3e44172 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -103,9 +103,15 @@ class SynapseHomeServer(HomeServer): import syweb except ImportError: quit_with_error( - "Could not find a webclient. Please either install syweb\n" - "or configure the location of the source to server via\n" - "the config option `web_client_location`" + "Could not find a webclient.\n\n" + "Please either install the matrix-angular-sdk or configure\n" + "the location of the source to serve via the configuration\n" + "option `web_client_location`\n\n" + "To install the `matrix-angular-sdk` via pip, run:\n\n" + " pip install 'matrix-angular-sdk'\n" + "\n" + "You can also disable hosting of the webclient via the\n" + "configuration option `web_client`\n" ) syweb_path = os.path.dirname(syweb.__file__) webclient_path = os.path.join(syweb_path, "webclient") @@ -271,8 +277,7 @@ def quit_with_error(error_string): line_length = max([len(l) for l in message_lines]) + 2 sys.stderr.write("*" * line_length + '\n') for line in message_lines: - if line.strip(): - sys.stderr.write(" %s\n" % (line.strip(),)) + sys.stderr.write(" %s\n" % (line.rstrip(),)) sys.stderr.write("*" * line_length + '\n') sys.exit(1) From f63208a1c073a0c1b9de276e8173a3ebd94b9a75 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 15:13:35 +0100 Subject: [PATCH 237/266] Add utility to parse config and print out a key Usage: ``` $ python -m synapse.config read server_name -c homeserver.yaml localhost ``` --- synapse/config/__main__.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 synapse/config/__main__.py diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py new file mode 100644 index 000000000..725983a71 --- /dev/null +++ b/synapse/config/__main__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if __name__ == "__main__": + import sys + from homeserver import HomeServerConfig + + action = sys.argv[1] + + if action == "read": + key = sys.argv[2] + config = HomeServerConfig.load_config("", sys.argv[3:]) + + print getattr(config, key) + sys.exit(0) + else: + sys.stderr.write("Unknown command %r", action) + sys.exit(1) From d33f31d741f703758c092e311bae263ff3c3ef64 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 15:33:23 +0100 Subject: [PATCH 238/266] Print the correct pip install line when failing due to lack of matrix-angular-sdk --- synapse/app/homeserver.py | 7 ++++--- synapse/python_dependencies.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7f3e44172..ff7807c2e 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -16,7 +16,7 @@ import sys sys.dont_write_bytecode = True -from synapse.python_dependencies import check_requirements +from synapse.python_dependencies import check_requirements, DEPENDENCY_LINKS if __name__ == '__main__': check_requirements() @@ -108,10 +108,11 @@ class SynapseHomeServer(HomeServer): "the location of the source to serve via the configuration\n" "option `web_client_location`\n\n" "To install the `matrix-angular-sdk` via pip, run:\n\n" - " pip install 'matrix-angular-sdk'\n" + " pip install '%(dep)s'\n" "\n" "You can also disable hosting of the webclient via the\n" "configuration option `web_client`\n" + % {"dep": DEPENDENCY_LINKS["matrix-angular-sdk"]} ) syweb_path = os.path.dirname(syweb.__file__) webclient_path = os.path.join(syweb_path, "webclient") @@ -274,7 +275,7 @@ class SynapseHomeServer(HomeServer): def quit_with_error(error_string): message_lines = error_string.split("\n") - line_length = max([len(l) for l in message_lines]) + 2 + line_length = max([len(l) for l in message_lines if len(l) < 80]) + 2 sys.stderr.write("*" * line_length + '\n') for line in message_lines: sys.stderr.write(" %s\n" % (line.rstrip(),)) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 82e28133e..d7e3a686f 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -52,18 +52,18 @@ def requirements(config=None, include_conditional=False): def github_link(project, version, egg): return "https://github.com/%s/tarball/%s/#egg=%s" % (project, version, egg) -DEPENDENCY_LINKS = [ - github_link( +DEPENDENCY_LINKS = { + "syutil": github_link( project="matrix-org/syutil", version="v0.0.7", egg="syutil-0.0.7", ), - github_link( + "matrix-angular-sdk": github_link( project="matrix-org/matrix-angular-sdk", version="v0.6.6", egg="matrix_angular_sdk-0.6.6", ), -] +} class MissingRequirementError(Exception): @@ -131,7 +131,7 @@ def check_requirements(config=None): def list_requirements(): result = [] linked = [] - for link in DEPENDENCY_LINKS: + for link in DEPENDENCY_LINKS.values(): egg = link.split("#egg=")[1] linked.append(egg.split('-')[0]) result.append(link) From 1d1c303b9b3790256d5ebf2d2e93528a23e8270a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 15:39:16 +0100 Subject: [PATCH 239/266] Fix typo when using sys.stderr.write --- synapse/config/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py index 725983a71..f822d1203 100644 --- a/synapse/config/__main__.py +++ b/synapse/config/__main__.py @@ -26,5 +26,5 @@ if __name__ == "__main__": print getattr(config, key) sys.exit(0) else: - sys.stderr.write("Unknown command %r", action) + sys.stderr.write("Unknown command %r\n" % (action,)) sys.exit(1) From bfb66773a438da2f8fb7192b7383f6efe65d87ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 16:25:54 +0100 Subject: [PATCH 240/266] Allow specifying directories as config files --- synapse/config/_base.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 73f695995..fd5080a3c 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -131,7 +131,8 @@ class Config(object): "-c", "--config-path", action="append", metavar="CONFIG_FILE", - help="Specify config file" + help="Specify config file. Can be given multiple times and" + " may specify directories containing *.yaml files." ) config_parser.add_argument( "--generate-config", @@ -151,14 +152,31 @@ class Config(object): generate_keys = config_args.generate_keys + config_files = [] + if config_args.config_path: + for config_path in config_args.config_path: + if os.path.isdir(config_path): + # We accept specifying directories as config paths, we search + # inside that directory for all files matching *.yaml, and then + # we apply them in *sorted* order. + config_files.extend(sorted( + os.path.join(config_path, entry) + for entry in os.listdir(config_path) + if entry.endswith(".yaml") and os.path.isfile( + os.path.join(config_path, entry) + ) + )) + else: + config_files.append(config_path) + if config_args.generate_config: - if not config_args.config_path: + if not config_files: config_parser.error( "Must supply a config file.\nA config file can be automatically" " generated using \"--generate-config -H SERVER_NAME" " -c CONFIG-FILE\"" ) - (config_path,) = config_args.config_path + (config_path,) = config_files if not os.path.exists(config_path): config_dir_path = os.path.dirname(config_path) config_dir_path = os.path.abspath(config_dir_path) @@ -202,7 +220,7 @@ class Config(object): obj.invoke_all("add_arguments", parser) args = parser.parse_args(remaining_args) - if not config_args.config_path: + if not config_files: config_parser.error( "Must supply a config file.\nA config file can be automatically" " generated using \"--generate-config -H SERVER_NAME" @@ -213,8 +231,8 @@ class Config(object): config_dir_path = os.path.abspath(config_dir_path) specified_config = {} - for config_path in config_args.config_path: - yaml_config = cls.read_config_file(config_path) + for config_file in config_files: + yaml_config = cls.read_config_file(config_file) specified_config.update(yaml_config) server_name = specified_config["server_name"] From af7c1397d1ba402bb58482c2204484d23d33a403 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 16:58:01 +0100 Subject: [PATCH 241/266] Add config option to specify where generated files should be dumped --- synapse/config/_base.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index fd5080a3c..1bc2e61ee 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -144,6 +144,12 @@ class Config(object): action="store_true", help="Generate any missing key files then exit" ) + config_parser.add_argument( + "--generated-directory", + metavar="DIRECTORY", + help="Used with 'generate-*' options to specify where generated" + " files (such as certs and signing keys) should be stored." + ) config_parser.add_argument( "-H", "--server-name", help="The server name to generate a config file for" @@ -178,7 +184,10 @@ class Config(object): ) (config_path,) = config_files if not os.path.exists(config_path): - config_dir_path = os.path.dirname(config_path) + if config_args.generated_directory: + config_dir_path = config_args.generated_directory + else: + config_dir_path = os.path.dirname(config_path) config_dir_path = os.path.abspath(config_dir_path) server_name = config_args.server_name @@ -227,7 +236,10 @@ class Config(object): " -c CONFIG-FILE\"" ) - config_dir_path = os.path.dirname(config_args.config_path[-1]) + if config_args.generated_directory: + config_dir_path = config_args.generated_directory + else: + config_dir_path = os.path.dirname(config_args.config_path[-1]) config_dir_path = os.path.abspath(config_dir_path) specified_config = {} From 3e1029fe8050edd8f6aed57bdf6507a12ffb0a0c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 17:08:23 +0100 Subject: [PATCH 242/266] Warn if we encounter unexpected files in config directories --- synapse/config/_base.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index fd5080a3c..2f218d4a7 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -159,13 +159,23 @@ class Config(object): # We accept specifying directories as config paths, we search # inside that directory for all files matching *.yaml, and then # we apply them in *sorted* order. - config_files.extend(sorted( - os.path.join(config_path, entry) - for entry in os.listdir(config_path) - if entry.endswith(".yaml") and os.path.isfile( - os.path.join(config_path, entry) - ) - )) + files = [] + for entry in os.listdir(config_path): + entry_path = os.path.join(config_path, entry) + if not os.path.isfile(entry_path): + print ( + "Found subdirectory in config directory: %r. IGNORING." + ) % (entry_path, ) + continue + + if not entry.endswith(".yaml"): + print ( + "Found file in config directory that does not" + " end in '.yaml': %r. IGNORING." + ) % (entry_path, ) + continue + + config_files.extend(sorted(files)) else: config_files.append(config_path) From 82145912c330528ec85276467f4ea38362e796b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 17:31:22 +0100 Subject: [PATCH 243/266] s/--generated-directory/--keys-directory/ --- synapse/config/_base.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 1bc2e61ee..93433c075 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -145,10 +145,10 @@ class Config(object): help="Generate any missing key files then exit" ) config_parser.add_argument( - "--generated-directory", + "--keys-directory", metavar="DIRECTORY", - help="Used with 'generate-*' options to specify where generated" - " files (such as certs and signing keys) should be stored." + help="Used with 'generate-*' options to specify where files such as" + " certs and signing keys should be stored in." ) config_parser.add_argument( "-H", "--server-name", @@ -184,8 +184,8 @@ class Config(object): ) (config_path,) = config_files if not os.path.exists(config_path): - if config_args.generated_directory: - config_dir_path = config_args.generated_directory + if config_args.keys_directory: + config_dir_path = config_args.keys_directory else: config_dir_path = os.path.dirname(config_path) config_dir_path = os.path.abspath(config_dir_path) @@ -236,8 +236,8 @@ class Config(object): " -c CONFIG-FILE\"" ) - if config_args.generated_directory: - config_dir_path = config_args.generated_directory + if config_args.keys_directory: + config_dir_path = config_args.keys_directory else: config_dir_path = os.path.dirname(config_args.config_path[-1]) config_dir_path = os.path.abspath(config_dir_path) From 3f6f74686a564face040bb0b4daf57a3112161e2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 17:37:21 +0100 Subject: [PATCH 244/266] Update config doc --- synapse/config/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 93433c075..e92a6c779 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -148,7 +148,8 @@ class Config(object): "--keys-directory", metavar="DIRECTORY", help="Used with 'generate-*' options to specify where files such as" - " certs and signing keys should be stored in." + " certs and signing keys should be stored in, unless explicitly" + " specified in the config." ) config_parser.add_argument( "-H", "--server-name", From 90fde4b8d7f2f74822df1008dadb4c31c2cde56c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Aug 2015 17:49:58 +0100 Subject: [PATCH 245/266] Bump changelog and version --- CHANGES.rst | 17 +++++++++++++++++ synapse/__init__.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f4946c6df..5f16e4485 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,20 @@ +Changes in synapse v0.10.0-rc3 (2015-08-25) +=========================================== + +* Add ``--keys-directory`` config option to specify where files such as + certs and signing keys should be stored in, when using ``--generate-config`` + or ``--generate-keys``. +* Allow ``--config-path`` to specify a directory, causing synapse to use all + \*.yaml files in the directory as config files. +* Add ``web_client_location`` config option to specify static files to be + hosted by synapse under ``/_matrix/client``. +* Add helper utility to synapse to read and parse the config files and extract + the value of a given key. For example:: + + $ python -m synapse.config read server_name -c homeserver.yaml + localhost + + Changes in synapse v0.10.0-rc2 (2015-08-24) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index b4eab89d4..41e3283f0 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc2" +__version__ = "0.10.0-rc3" From f4d552589e3cb815144dea646140db66d845a237 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 26 Aug 2015 10:51:08 +0100 Subject: [PATCH 246/266] Don't loop over all rooms ever in typing.get_new_events_for_user --- synapse/handlers/typing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 026bd2b9d..1ed220d87 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -260,8 +260,8 @@ class TypingNotificationEventSource(object): ) events = [] - for room_id in handler._room_serials: - if room_id not in joined_room_ids: + for room_id in joined_room_ids: + if room_id not in handler._room_serials: continue if handler._room_serials[room_id] <= from_key: continue From da51acf0e752badb73c036f4b1cf0ec943b6dcb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 26 Aug 2015 11:08:23 +0100 Subject: [PATCH 247/266] Remove needless existence checks --- synapse/handlers/typing.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 1ed220d87..d7096aab8 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -204,15 +204,11 @@ class TypingNotificationHandler(BaseHandler): ) def _push_update_local(self, room_id, user, typing): - if room_id not in self._room_serials: - self._room_serials[room_id] = 0 - self._room_typing[room_id] = set() - - room_set = self._room_typing[room_id] + room_set = self._room_typing.setdefault(room_id, set()) if typing: room_set.add(user) - elif user in room_set: - room_set.remove(user) + else: + room_set.discard(user) self._latest_room_serial += 1 self._room_serials[room_id] = self._latest_room_serial From e85c7873dc885c18705c2a77d8487517379d64fb Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 16:26:37 +0100 Subject: [PATCH 248/266] Allow non-ascii filenames for attachments --- synapse/rest/media/v1/base_resource.py | 17 +++++++++++++---- synapse/rest/media/v1/upload_resource.py | 6 ++---- synapse/util/stringutils.py | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 4e21527c3..24297b20f 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -33,6 +33,7 @@ import os import cgi import logging +import urllib logger = logging.getLogger(__name__) @@ -181,10 +182,18 @@ class BaseMediaResource(Resource): if os.path.isfile(file_path): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), - ) + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % (upload_name.encode("utf-8"),), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index cdd1d44e0..21d8fb9ce 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -15,7 +15,7 @@ from synapse.http.server import respond_with_json, request_handler -from synapse.util.stringutils import random_string, is_ascii +from synapse.util.stringutils import random_string from synapse.api.errors import SynapseError from twisted.web.server import NOT_DONE_YET @@ -86,9 +86,7 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0] - if upload_name and not is_ascii(upload_name): - raise SynapseError(400, "filename must be ascii") + upload_name = upload_name[0].decode('UTF-8') headers = request.requestHeaders diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 7a1e96af3..f3a36340e 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -38,6 +38,8 @@ def random_string_with_symbols(length): def is_ascii(s): try: s.encode("ascii") + except UnicodeEncodeError: + return False except UnicodeDecodeError: return False else: From 5a9e0c36824ffc8bb365cdb30a273d427f997bd9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:08:47 +0100 Subject: [PATCH 249/266] Handle unicode filenames given when downloading or received over federation --- synapse/rest/media/v1/base_resource.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 24297b20f..ad2c9d4e7 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -34,6 +34,7 @@ import os import cgi import logging import urllib +import urlparse logger = logging.getLogger(__name__) @@ -43,10 +44,13 @@ def parse_media_id(request): # This allows users to append e.g. /test.png to the URL. Useful for # clients that parse the URL to see content type. server_name, media_id = request.postpath[:2] - if len(request.postpath) > 2 and is_ascii(request.postpath[-1]): - return server_name, media_id, request.postpath[-1] - else: - return server_name, media_id, None + file_name = None + if len(request.postpath) > 2: + try: + file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") + except UnicodeDecodeError: + pass + return server_name, media_id, file_name except: raise SynapseError( 404, @@ -144,6 +148,16 @@ class BaseMediaResource(Resource): upload_name = params.get("filename", None) if upload_name and not is_ascii(upload_name): upload_name = None + else: + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + if upload_name: + upload_name = urlparse.unquote(upload_name) + try: + upload_name = upload_name.decode("utf-8"); + except UnicodeDecodeError: + upload_name = None else: upload_name = None @@ -185,7 +199,9 @@ class BaseMediaResource(Resource): if is_ascii(upload_name): request.setHeader( b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), ) else: request.setHeader( From c9cb354b58972b9e0e91cd6d6398e9bb02f7b967 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:27:23 +0100 Subject: [PATCH 250/266] Give a sensible error message if the filename is invalid UTF-8 --- synapse/rest/media/v1/base_resource.py | 2 +- synapse/rest/media/v1/upload_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index ad2c9d4e7..60751da1d 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -155,7 +155,7 @@ class BaseMediaResource(Resource): if upload_name: upload_name = urlparse.unquote(upload_name) try: - upload_name = upload_name.decode("utf-8"); + upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 21d8fb9ce..031bfa80f 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -86,7 +86,13 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0].decode('UTF-8') + try: + upload_name = upload_name[0].decode('UTF-8') + except UnicodeDecodeError: + raise SynapseError( + msg="Invalid UTF-8 filename parameter: %r" % (upload_name), + code=400, + ) headers = request.requestHeaders From 25b32b63aee6cc2feb4ffdd4c247475f63a241dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:09:32 +0100 Subject: [PATCH 251/266] Bump changelog and version --- CHANGES.rst | 13 ++++++++++--- synapse/__init__.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5f16e4485..22921668e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,19 +1,26 @@ +Changes in synapse v0.10.0-rc4 (2015-08-27) +=========================================== + +* Allow UTF-8 filenames for upload. (PR #259) + Changes in synapse v0.10.0-rc3 (2015-08-25) =========================================== * Add ``--keys-directory`` config option to specify where files such as certs and signing keys should be stored in, when using ``--generate-config`` - or ``--generate-keys``. + or ``--generate-keys``. (PR #250) * Allow ``--config-path`` to specify a directory, causing synapse to use all - \*.yaml files in the directory as config files. + \*.yaml files in the directory as config files. (PR #249) * Add ``web_client_location`` config option to specify static files to be - hosted by synapse under ``/_matrix/client``. + hosted by synapse under ``/_matrix/client``. (PR #245) * Add helper utility to synapse to read and parse the config files and extract the value of a given key. For example:: $ python -m synapse.config read server_name -c homeserver.yaml localhost + (PR #246) + Changes in synapse v0.10.0-rc2 (2015-08-24) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 41e3283f0..635f53af1 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc3" +__version__ = "0.10.0-rc4" From f02532baadc4fbd95bec6cb7f45019d2c46c1324 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:37:02 +0100 Subject: [PATCH 252/266] Check for None --- synapse/rest/media/v1/base_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 60751da1d..b0e997b47 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -150,7 +150,7 @@ class BaseMediaResource(Resource): upload_name = None else: upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8.lower().startswith("utf-8''"): + if upload_name and upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] if upload_name: upload_name = urlparse.unquote(upload_name) From 53c2eed862c2c2fc90ee4b51bed624be5fcec9f3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:38:22 +0100 Subject: [PATCH 253/266] None check the correct variable --- synapse/rest/media/v1/base_resource.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index b0e997b47..610cb3ef8 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -150,8 +150,9 @@ class BaseMediaResource(Resource): upload_name = None else: upload_name_utf8 = params.get("filename*", None) - if upload_name and upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] if upload_name: upload_name = urlparse.unquote(upload_name) try: From 66ec6cf9b892cd22dd75d9b66f10b120ebe233ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:48:58 +0100 Subject: [PATCH 254/266] Check for an internationalised filename first --- synapse/rest/media/v1/base_resource.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 610cb3ef8..03ebbbefe 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -145,14 +145,20 @@ class BaseMediaResource(Resource): content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) - upload_name = params.get("filename", None) - if upload_name and not is_ascii(upload_name): - upload_name = None - else: - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name = params.get("filename", None) + if upload_name and not is_ascii(upload_name): + upload_name = None + if upload_name: upload_name = urlparse.unquote(upload_name) try: From ddf4d2bd981cbc4079b2bff0a2bba500b1aad208 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:50:49 +0100 Subject: [PATCH 255/266] Consistency --- synapse/rest/media/v1/base_resource.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 03ebbbefe..b2aeb8c90 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -155,9 +155,9 @@ class BaseMediaResource(Resource): # If there isn't check for an ascii name. if not upload_name: - upload_name = params.get("filename", None) - if upload_name and not is_ascii(upload_name): - upload_name = None + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) From 5371c2a1f74dcd9c79c2cd5166e5b33f7e02afef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 11:20:36 +0100 Subject: [PATCH 256/266] Bump version and changelog --- CHANGES.rst | 5 +++++ synapse/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 22921668e..8b9916c96 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,8 @@ +Changes in synapse v0.10.0-rc5 (2015-08-27) +=========================================== + +* Fix bug that broke downloading files with ascii filenames across federation. + Changes in synapse v0.10.0-rc4 (2015-08-27) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 635f53af1..57b8304d3 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc4" +__version__ = "0.10.0-rc5" From c961cd7736050ace8dfb18663ef678c64f2156a5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 13:01:38 +0100 Subject: [PATCH 257/266] Clean up scripts/ --- docs/postgres.rst | 11 +- scripts/database-prepare-for-0.0.1.sh | 21 -- scripts/database-prepare-for-0.5.0.sh | 21 -- ..._sqlite_to_postgres.py => synapse_port_db} | 0 scripts/upgrade_db_to_v0.6.0.py | 331 ------------------ 5 files changed, 4 insertions(+), 380 deletions(-) delete mode 100755 scripts/database-prepare-for-0.0.1.sh delete mode 100755 scripts/database-prepare-for-0.5.0.sh rename scripts/{port_from_sqlite_to_postgres.py => synapse_port_db} (100%) delete mode 100755 scripts/upgrade_db_to_v0.6.0.py diff --git a/docs/postgres.rst b/docs/postgres.rst index 19d839111..017b3c47e 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -55,9 +55,8 @@ Porting from SQLite Overview ~~~~~~~~ -The script ``port_from_sqlite_to_postgres.py`` allows porting an existing -synapse server backed by SQLite to using PostgreSQL. This is done in as a two -phase process: +The script ``synapse_port_db`` allows porting an existing synapse server +backed by SQLite to using PostgreSQL. This is done in as a two phase process: 1. Copy the existing SQLite database to a separate location (while the server is down) and running the port script against that offline database. @@ -86,8 +85,7 @@ Assuming your new config file (as described in the section *Synapse config*) is named ``homeserver-postgres.yaml`` and the SQLite snapshot is at ``homeserver.db.snapshot`` then simply run:: - python scripts/port_from_sqlite_to_postgres.py \ - --sqlite-database homeserver.db.snapshot \ + synapse_port_db --sqlite-database homeserver.db.snapshot \ --postgres-config homeserver-postgres.yaml The flag ``--curses`` displays a coloured curses progress UI. @@ -100,8 +98,7 @@ To complete the conversion shut down the synapse server and run the port script one last time, e.g. if the SQLite database is at ``homeserver.db`` run:: - python scripts/port_from_sqlite_to_postgres.py \ - --sqlite-database homeserver.db \ + synapse_port_db.py --sqlite-database homeserver.db \ --postgres-config database_config.yaml Once that has completed, change the synapse config to point at the PostgreSQL diff --git a/scripts/database-prepare-for-0.0.1.sh b/scripts/database-prepare-for-0.0.1.sh deleted file mode 100755 index 43d759a5c..000000000 --- a/scripts/database-prepare-for-0.0.1.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# This is will prepare a synapse database for running with v0.0.1 of synapse. -# It will store all the user information, but will *delete* all messages and -# room data. - -set -e - -cp "$1" "$1.bak" - -DUMP=$(sqlite3 "$1" << 'EOF' -.dump users -.dump access_tokens -.dump presence -.dump profiles -EOF -) - -rm "$1" - -sqlite3 "$1" <<< "$DUMP" diff --git a/scripts/database-prepare-for-0.5.0.sh b/scripts/database-prepare-for-0.5.0.sh deleted file mode 100755 index e824cb583..000000000 --- a/scripts/database-prepare-for-0.5.0.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# This is will prepare a synapse database for running with v0.5.0 of synapse. -# It will store all the user information, but will *delete* all messages and -# room data. - -set -e - -cp "$1" "$1.bak" - -DUMP=$(sqlite3 "$1" << 'EOF' -.dump users -.dump access_tokens -.dump presence -.dump profiles -EOF -) - -rm "$1" - -sqlite3 "$1" <<< "$DUMP" diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/synapse_port_db similarity index 100% rename from scripts/port_from_sqlite_to_postgres.py rename to scripts/synapse_port_db diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py deleted file mode 100755 index f466ca510..000000000 --- a/scripts/upgrade_db_to_v0.6.0.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python -from synapse.storage import SCHEMA_VERSION, read_schema -from synapse.storage._base import SQLBaseStore -from synapse.storage.signatures import SignatureStore -from synapse.storage.event_federation import EventFederationStore - -from syutil.base64util import encode_base64, decode_base64 - -from synapse.crypto.event_signing import compute_event_signature - -from synapse.events.builder import EventBuilder -from synapse.events.utils import prune_event - -from synapse.crypto.event_signing import check_event_content_hash - -from syutil.crypto.jsonsign import ( - verify_signed_json, SignatureVerifyException, -) -from syutil.crypto.signing_key import decode_verify_key_bytes - -from syutil.jsonutil import encode_canonical_json - -import argparse -# import dns.resolver -import hashlib -import httplib -import json -import sqlite3 -import syutil -import urllib2 - - -delta_sql = """ -CREATE TABLE IF NOT EXISTS event_json( - event_id TEXT NOT NULL, - room_id TEXT NOT NULL, - internal_metadata NOT NULL, - json BLOB NOT NULL, - CONSTRAINT ev_j_uniq UNIQUE (event_id) -); - -CREATE INDEX IF NOT EXISTS event_json_id ON event_json(event_id); -CREATE INDEX IF NOT EXISTS event_json_room_id ON event_json(room_id); - -PRAGMA user_version = 10; -""" - - -class Store(object): - _get_event_signatures_txn = SignatureStore.__dict__["_get_event_signatures_txn"] - _get_event_content_hashes_txn = SignatureStore.__dict__["_get_event_content_hashes_txn"] - _get_event_reference_hashes_txn = SignatureStore.__dict__["_get_event_reference_hashes_txn"] - _get_prev_event_hashes_txn = SignatureStore.__dict__["_get_prev_event_hashes_txn"] - _get_prev_events_and_state = EventFederationStore.__dict__["_get_prev_events_and_state"] - _get_auth_events = EventFederationStore.__dict__["_get_auth_events"] - cursor_to_dict = SQLBaseStore.__dict__["cursor_to_dict"] - _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] - _simple_select_list_txn = SQLBaseStore.__dict__["_simple_select_list_txn"] - _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] - - def _generate_event_json(self, txn, rows): - events = [] - for row in rows: - d = dict(row) - - d.pop("stream_ordering", None) - d.pop("topological_ordering", None) - d.pop("processed", None) - - if "origin_server_ts" not in d: - d["origin_server_ts"] = d.pop("ts", 0) - else: - d.pop("ts", 0) - - d.pop("prev_state", None) - d.update(json.loads(d.pop("unrecognized_keys"))) - - d["sender"] = d.pop("user_id") - - d["content"] = json.loads(d["content"]) - - if "age_ts" not in d: - # For compatibility - d["age_ts"] = d.get("origin_server_ts", 0) - - d.setdefault("unsigned", {})["age_ts"] = d.pop("age_ts") - - outlier = d.pop("outlier", False) - - # d.pop("membership", None) - - d.pop("state_hash", None) - - d.pop("replaces_state", None) - - b = EventBuilder(d) - b.internal_metadata.outlier = outlier - - events.append(b) - - for i, ev in enumerate(events): - signatures = self._get_event_signatures_txn( - txn, ev.event_id, - ) - - ev.signatures = { - n: { - k: encode_base64(v) for k, v in s.items() - } - for n, s in signatures.items() - } - - hashes = self._get_event_content_hashes_txn( - txn, ev.event_id, - ) - - ev.hashes = { - k: encode_base64(v) for k, v in hashes.items() - } - - prevs = self._get_prev_events_and_state(txn, ev.event_id) - - ev.prev_events = [ - (e_id, h) - for e_id, h, is_state in prevs - if is_state == 0 - ] - - # ev.auth_events = self._get_auth_events(txn, ev.event_id) - - hashes = dict(ev.auth_events) - - for e_id, hash in ev.prev_events: - if e_id in hashes and not hash: - hash.update(hashes[e_id]) - # - # if hasattr(ev, "state_key"): - # ev.prev_state = [ - # (e_id, h) - # for e_id, h, is_state in prevs - # if is_state == 1 - # ] - - return [e.build() for e in events] - - -store = Store() - - -# def get_key(server_name): -# print "Getting keys for: %s" % (server_name,) -# targets = [] -# if ":" in server_name: -# target, port = server_name.split(":") -# targets.append((target, int(port))) -# try: -# answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") -# for srv in answers: -# targets.append((srv.target, srv.port)) -# except dns.resolver.NXDOMAIN: -# targets.append((server_name, 8448)) -# except: -# print "Failed to lookup keys for %s" % (server_name,) -# return {} -# -# for target, port in targets: -# url = "https://%s:%i/_matrix/key/v1" % (target, port) -# try: -# keys = json.load(urllib2.urlopen(url, timeout=2)) -# verify_keys = {} -# for key_id, key_base64 in keys["verify_keys"].items(): -# verify_key = decode_verify_key_bytes( -# key_id, decode_base64(key_base64) -# ) -# verify_signed_json(keys, server_name, verify_key) -# verify_keys[key_id] = verify_key -# print "Got keys for: %s" % (server_name,) -# return verify_keys -# except urllib2.URLError: -# pass -# except urllib2.HTTPError: -# pass -# except httplib.HTTPException: -# pass -# -# print "Failed to get keys for %s" % (server_name,) -# return {} - - -def reinsert_events(cursor, server_name, signing_key): - print "Running delta: v10" - - cursor.executescript(delta_sql) - - cursor.execute( - "SELECT * FROM events ORDER BY rowid ASC" - ) - - print "Getting events..." - - rows = store.cursor_to_dict(cursor) - - events = store._generate_event_json(cursor, rows) - - print "Got events from DB." - - algorithms = { - "sha256": hashlib.sha256, - } - - key_id = "%s:%s" % (signing_key.alg, signing_key.version) - verify_key = signing_key.verify_key - verify_key.alg = signing_key.alg - verify_key.version = signing_key.version - - server_keys = { - server_name: { - key_id: verify_key - } - } - - i = 0 - N = len(events) - - for event in events: - if i % 100 == 0: - print "Processed: %d/%d events" % (i,N,) - i += 1 - - # for alg_name in event.hashes: - # if check_event_content_hash(event, algorithms[alg_name]): - # pass - # else: - # pass - # print "FAIL content hash %s %s" % (alg_name, event.event_id, ) - - have_own_correctly_signed = False - for host, sigs in event.signatures.items(): - pruned = prune_event(event) - - for key_id in sigs: - if host not in server_keys: - server_keys[host] = {} # get_key(host) - if key_id in server_keys[host]: - try: - verify_signed_json( - pruned.get_pdu_json(), - host, - server_keys[host][key_id] - ) - - if host == server_name: - have_own_correctly_signed = True - except SignatureVerifyException: - print "FAIL signature check %s %s" % ( - key_id, event.event_id - ) - - # TODO: Re sign with our own server key - if not have_own_correctly_signed: - sigs = compute_event_signature(event, server_name, signing_key) - event.signatures.update(sigs) - - pruned = prune_event(event) - - for key_id in event.signatures[server_name]: - verify_signed_json( - pruned.get_pdu_json(), - server_name, - server_keys[server_name][key_id] - ) - - event_json = encode_canonical_json( - event.get_dict() - ).decode("UTF-8") - - metadata_json = encode_canonical_json( - event.internal_metadata.get_dict() - ).decode("UTF-8") - - store._simple_insert_txn( - cursor, - table="event_json", - values={ - "event_id": event.event_id, - "room_id": event.room_id, - "internal_metadata": metadata_json, - "json": event_json, - }, - or_replace=True, - ) - - -def main(database, server_name, signing_key): - conn = sqlite3.connect(database) - cursor = conn.cursor() - - # Do other deltas: - cursor.execute("PRAGMA user_version") - row = cursor.fetchone() - - if row and row[0]: - user_version = row[0] - # Run every version since after the current version. - for v in range(user_version + 1, 10): - print "Running delta: %d" % (v,) - sql_script = read_schema("delta/v%d" % (v,)) - cursor.executescript(sql_script) - - reinsert_events(cursor, server_name, signing_key) - - conn.commit() - - print "Success!" - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument("database") - parser.add_argument("server_name") - parser.add_argument( - "signing_key", type=argparse.FileType('r'), - ) - args = parser.parse_args() - - signing_key = syutil.crypto.signing_key.read_signing_keys( - args.signing_key - ) - - main(args.database, args.server_name, signing_key[0]) From b442217d918a50ae57a932a955fd577240607e92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 28 Aug 2015 10:37:17 +0100 Subject: [PATCH 258/266] Actually add config path --- synapse/config/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index d01235d31..1a6784a71 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -182,6 +182,8 @@ class Config(object): ) % (entry_path, ) continue + files.add(config_path) + config_files.extend(sorted(files)) else: config_files.append(config_path) From 3e4de64bc9bc5ff1645f917e7c6deacf29fb868b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Sep 2015 09:46:42 +0100 Subject: [PATCH 259/266] Remove spurious .py from docs --- docs/postgres.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/postgres.rst b/docs/postgres.rst index 017b3c47e..b5027fefb 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -98,7 +98,7 @@ To complete the conversion shut down the synapse server and run the port script one last time, e.g. if the SQLite database is at ``homeserver.db`` run:: - synapse_port_db.py --sqlite-database homeserver.db \ + synapse_port_db --sqlite-database homeserver.db \ --postgres-config database_config.yaml Once that has completed, change the synapse config to point at the PostgreSQL From cd800ad99afa5c689f10fd5e10843b74e4589336 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Sep 2015 09:54:51 +0100 Subject: [PATCH 260/266] Lower size of 'stateGroupCache' now that we have data from matrix.org to support doing so --- synapse/storage/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1444767a5..d976e1778 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -167,7 +167,7 @@ class SQLBaseStore(object): self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True, max_entries=hs.config.event_cache_size) - self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000) + self._state_group_cache = DictionaryCache("*stateGroupCache*", 2000) self._event_fetch_lock = threading.Condition() self._event_fetch_list = [] From a9ad647fb27aaa5085a828157f30627edfbe76aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Sep 2015 11:11:11 +0100 Subject: [PATCH 261/266] Make port script handle empty sent_transactions table --- scripts/synapse_port_db | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index e7ed4c309..c02dff5ba 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -412,14 +412,17 @@ class Porter(object): self._convert_rows("sent_transactions", headers, rows) inserted_rows = len(rows) - max_inserted_rowid = max(r[0] for r in rows) + if inserted_rows: + max_inserted_rowid = max(r[0] for r in rows) - def insert(txn): - self.postgres_store.insert_many_txn( - txn, "sent_transactions", headers[1:], rows - ) + def insert(txn): + self.postgres_store.insert_many_txn( + txn, "sent_transactions", headers[1:], rows + ) - yield self.postgres_store.execute(insert) + yield self.postgres_store.execute(insert) + else: + max_inserted_rowid = 0 def get_start_id(txn): txn.execute( From aaf319820a8b59936a210c8998ed8feef58257cd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Sep 2015 15:29:03 +0100 Subject: [PATCH 262/266] Update README to include RAM requirements --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index d8d179135..5b30ee356 100644 --- a/README.rst +++ b/README.rst @@ -94,6 +94,7 @@ Synapse is the reference python/twisted Matrix homeserver implementation. System requirements: - POSIX-compliant system (tested on Linux & OS X) - Python 2.7 +- At least 512 MB RAM. Synapse is written in python but some of the libraries is uses are written in C. So before we can install synapse itself we need a working C compiler and the From e90f32646f2f4a51079a1e64405cadce616e54c3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Sep 2015 17:16:14 +0100 Subject: [PATCH 263/266] Bump version and changelog --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8b9916c96..a964ffca9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.10.0-rc6 (2015-09-02) +=========================================== + +* Remove some of the old database upgrade scripts. +* Fix database port script to work with newly created sqlite databases. + Changes in synapse v0.10.0-rc5 (2015-08-27) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 57b8304d3..f8cb48ff1 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc5" +__version__ = "0.10.0-rc6" From fd0a919af3fa9cd200b6af8c9f185545ffe331ef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Sep 2015 17:27:59 +0100 Subject: [PATCH 264/266] Lists use 'append' --- synapse/config/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 1a6784a71..8a75c4873 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -182,7 +182,7 @@ class Config(object): ) % (entry_path, ) continue - files.add(config_path) + files.append(entry_path) config_files.extend(sorted(files)) else: From 9ad38c9807a215925d002869844b479ffc47fa0e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Sep 2015 09:49:54 +0100 Subject: [PATCH 265/266] Bump version and changelog --- CHANGES.rst | 5 +++++ synapse/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index a964ffca9..2ec10516f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,8 @@ +Changes in synapse v0.10.0 (2015-09-03) +======================================= + +No change from release candidate. + Changes in synapse v0.10.0-rc6 (2015-09-02) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index f8cb48ff1..d85bb3dce 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.10.0-rc6" +__version__ = "0.10.0" From 1002bbd7322e48c9ecbb1ea21b3fb8a44e4a4360 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Sep 2015 09:51:01 +0100 Subject: [PATCH 266/266] Change log level to info --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index ff7807c2e..fefefffb8 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -341,7 +341,7 @@ def get_version_string(): ) ).encode("ascii") except Exception as e: - logger.warn("Failed to check for git repository: %s", e) + logger.info("Failed to check for git repository: %s", e) return ("Synapse/%s" % (synapse.__version__,)).encode("ascii")