diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index abed6b5e6..23f8b612a 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.util.frozenutils import freeze +from synapse.util.caches import intern_dict # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents @@ -140,6 +141,10 @@ class FrozenEvent(EventBase): unsigned = dict(event_dict.pop("unsigned", {})) + # We intern these strings because they turn up a lot (especially when + # caching). + event_dict = intern_dict(event_dict) + if USE_FROZEN_DICTS: frozen_dict = freeze(event_dict) else: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 83c1f4658..37ee469fa 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -418,6 +418,7 @@ class FederationClient(FederationBase): "Failed to make_%s via %s: %s", membership, destination, e.message ) + raise raise RuntimeError("Failed to send to any server.") diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 208bff8d4..d65a7893d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -175,7 +175,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/([^/]*)/" + PATH = "/send/(?P[^/]*)/" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -250,7 +250,7 @@ class FederationPullServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/([^/]*)/" + PATH = "/event/(?P[^/]*)/" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -258,7 +258,7 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/([^/]*)/" + PATH = "/state/(?P[^/]*)/" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): @@ -270,7 +270,7 @@ class FederationStateServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/([^/]*)/" + PATH = "/backfill/(?P[^/]*)/" def on_GET(self, origin, content, query, context): versions = query["v"] @@ -285,7 +285,7 @@ class FederationBackfillServlet(BaseFederationServlet): class FederationQueryServlet(BaseFederationServlet): - PATH = "/query/([^/]*)" + PATH = "/query/(?P[^/]*)" # This is when we receive a server-server Query def on_GET(self, origin, content, query, query_type): @@ -296,7 +296,7 @@ class FederationQueryServlet(BaseFederationServlet): class FederationMakeJoinServlet(BaseFederationServlet): - PATH = "/make_join/([^/]*)/([^/]*)" + PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -305,7 +305,7 @@ class FederationMakeJoinServlet(BaseFederationServlet): class FederationMakeLeaveServlet(BaseFederationServlet): - PATH = "/make_leave/([^/]*)/([^/]*)" + PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): @@ -314,7 +314,7 @@ class FederationMakeLeaveServlet(BaseFederationServlet): class FederationSendLeaveServlet(BaseFederationServlet): - PATH = "/send_leave/([^/]*)/([^/]*)" + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id, txid): @@ -323,14 +323,14 @@ class FederationSendLeaveServlet(BaseFederationServlet): class FederationEventAuthServlet(BaseFederationServlet): - PATH = "/event_auth/([^/]*)/([^/]*)" + PATH = "/event_auth(?P[^/]*)/(?P[^/]*)" def on_GET(self, origin, content, query, context, event_id): return self.handler.on_event_auth(origin, context, event_id) class FederationSendJoinServlet(BaseFederationServlet): - PATH = "/send_join/([^/]*)/([^/]*)" + PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -341,7 +341,7 @@ class FederationSendJoinServlet(BaseFederationServlet): class FederationInviteServlet(BaseFederationServlet): - PATH = "/invite/([^/]*)/([^/]*)" + PATH = "/invite/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, context, event_id): @@ -352,7 +352,7 @@ class FederationInviteServlet(BaseFederationServlet): class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): - PATH = "/exchange_third_party_invite/([^/]*)" + PATH = "/exchange_third_party_invite/(?P[^/]*)" @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id): @@ -381,7 +381,7 @@ class FederationClientKeysClaimServlet(BaseFederationServlet): class FederationQueryAuthServlet(BaseFederationServlet): - PATH = "/query_auth/([^/]*)/([^/]*)" + PATH = "/query_auth/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, context, event_id): @@ -394,7 +394,7 @@ class FederationQueryAuthServlet(BaseFederationServlet): class FederationGetMissingEventsServlet(BaseFederationServlet): # TODO(paul): Why does this path alone end with "/?" optional? - PATH = "/get_missing_events/([^/]*)/?" + PATH = "/get_missing_events/(?P[^/]*)/?" @defer.inlineCallbacks def on_POST(self, origin, content, query, room_id): diff --git a/synapse/http/server.py b/synapse/http/server.py index b17b190ee..b82196fd5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -18,6 +18,7 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes ) from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.caches import intern_dict import synapse.metrics import synapse.events @@ -229,11 +230,12 @@ class JsonResource(HttpServer, resource.Resource): else: servlet_classname = "%r" % callback - args = [ - urllib.unquote(u).decode("UTF-8") if u else u for u in m.groups() - ] + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in m.groupdict().items() + }) - callback_return = yield callback(request, *args) + callback_return = yield callback(request, **kwargs) if callback_return is not None: code, response = callback_return self._send_response(request, code, response) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 583b77a83..b75b79df3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import StoreError from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.caches.descriptors import Cache +from synapse.util.caches import intern_dict import synapse.metrics @@ -350,7 +351,7 @@ class SQLBaseStore(object): """ col_headers = list(column[0] for column in cursor.description) results = list( - dict(zip(col_headers, row)) for row in cursor.fetchall() + intern_dict(dict(zip(col_headers, row))) for row in cursor.fetchall() ) return results diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index dbc074d6b..6b9d848ea 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -62,18 +62,17 @@ class ReceiptsStore(SQLBaseStore): @cachedInlineCallbacks(num_args=2) def get_receipts_for_user(self, user_id, receipt_type): - def f(txn): - sql = ( - "SELECT room_id,event_id " - "FROM receipts_linearized " - "WHERE user_id = ? AND receipt_type = ? " - ) - txn.execute(sql, (user_id, receipt_type)) - return txn.fetchall() + rows = yield self._simple_select_list( + table="receipts_linearized", + keyvalues={ + "user_id": user_id, + "receipt_type": receipt_type, + }, + retcols=("room_id", "event_id"), + desc="get_receipts_for_user", + ) - defer.returnValue(dict( - (yield self.runInteraction("get_receipts_for_user", f)) - )) + defer.returnValue({row["room_id"]: row["event_id"] for row in rows}) @defer.inlineCallbacks def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): diff --git a/synapse/storage/state.py b/synapse/storage/state.py index eab2c5a8c..02cefdff2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -15,6 +15,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.caches import intern_string from twisted.internet import defer @@ -393,7 +394,10 @@ class StateStore(SQLBaseStore): # cache absence of the key, on the assumption that if we've # explicitly asked for some types then we will probably ask # for them again. - state_dict = {key: None for key in types} + state_dict = { + (intern_string(etype), intern_string(state_key)): None + for (etype, state_key) in types + } state_dict.update(results[group]) results[group] = state_dict else: diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 1a1490419..d53569ca4 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -14,6 +14,10 @@ # limitations under the License. import synapse.metrics +from lrucache import LruCache +import os + +CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) DEBUG_CACHES = False @@ -25,3 +29,56 @@ cache_counter = metrics.register_cache( lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, labels=["name"], ) + +_string_cache = LruCache(int(5000 * CACHE_SIZE_FACTOR)) +caches_by_name["string_cache"] = _string_cache + + +KNOWN_KEYS = { + key: key for key in + ( + "auth_events", + "content", + "depth", + "event_id", + "hashes", + "origin", + "origin_server_ts", + "prev_events", + "room_id", + "sender", + "signatures", + "state_key", + "type", + "unsigned", + "user_id", + ) +} + + +def intern_string(string): + """Takes a (potentially) unicode string and interns using custom cache + """ + return _string_cache.setdefault(string, string) + + +def intern_dict(dictionary): + """Takes a dictionary and interns well known keys and their values + """ + return { + KNOWN_KEYS.get(key, key): _intern_known_values(key, value) + for key, value in dictionary.items() + } + + +def _intern_known_values(key, value): + intern_str_keys = ("event_id", "room_id") + intern_unicode_keys = ("sender", "user_id", "type", "state_key") + + if key in intern_str_keys: + return intern(value.encode('ascii')) + + if key in intern_unicode_keys: + return intern_string(value) + + return value