Only intern ascii strings

This commit is contained in:
Erik Johnston 2017-04-24 14:07:27 +01:00
parent cea7839911
commit d134d0935e

View File

@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
import synapse.metrics import synapse.metrics
from lrucache import LruCache
import os import os
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1)) CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.1))
@ -40,10 +39,6 @@ def register_cache(name, cache):
) )
_string_cache = LruCache(int(100000 * CACHE_SIZE_FACTOR))
_stirng_cache_metrics = register_cache("string_cache", _string_cache)
KNOWN_KEYS = { KNOWN_KEYS = {
key: key for key in key: key for key in
( (
@ -67,14 +62,16 @@ KNOWN_KEYS = {
def intern_string(string): def intern_string(string):
"""Takes a (potentially) unicode string and interns using custom cache """Takes a (potentially) unicode string and interns it if it's ascii
""" """
new_str = _string_cache.setdefault(string, string) if string is None:
if new_str is string: return None
_stirng_cache_metrics.inc_hits()
else: try:
_stirng_cache_metrics.inc_misses() string = string.encode("ascii")
return new_str return intern(string)
except UnicodeEncodeError:
return string
def intern_dict(dictionary): def intern_dict(dictionary):
@ -87,13 +84,9 @@ def intern_dict(dictionary):
def _intern_known_values(key, value): def _intern_known_values(key, value):
intern_str_keys = ("event_id", "room_id") intern_keys = ("event_id", "room_id", "sender", "user_id", "type", "state_key",)
intern_unicode_keys = ("sender", "user_id", "type", "state_key")
if key in intern_str_keys: if key in intern_keys:
return intern(value.encode('ascii'))
if key in intern_unicode_keys:
return intern_string(value) return intern_string(value)
return value return value