diff --git a/changelog.d/4253.bugfix b/changelog.d/4253.bugfix new file mode 100644 index 000000000..1796e95b8 --- /dev/null +++ b/changelog.d/4253.bugfix @@ -0,0 +1 @@ +Fix UnicodeDecodeError when postgres is configured to give non-English errors diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d9d0255d0..38e7d2636 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -29,6 +29,7 @@ from synapse.api.errors import StoreError from synapse.storage.engines import PostgresEngine from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.stringutils import exception_to_unicode logger = logging.getLogger(__name__) @@ -249,32 +250,32 @@ class SQLBaseStore(object): except self.database_engine.module.OperationalError as e: # This can happen if the database disappears mid # transaction. - logger.warn( + logger.warning( "[TXN OPERROR] {%s} %s %d/%d", - name, e, i, N + name, exception_to_unicode(e), i, N ) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise except self.database_engine.module.DatabaseError as e: if self.database_engine.is_deadlock(e): - logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N) + logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 6f318c6a2..fdcb375f9 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -16,7 +16,8 @@ import random import string -from six import PY3 +import six +from six import PY2, PY3 from six.moves import range _string_with_symbols = ( @@ -71,3 +72,39 @@ def to_ascii(s): return s.encode("ascii") except UnicodeEncodeError: return s + + +def exception_to_unicode(e): + """Helper function to extract the text of an exception as a unicode string + + Args: + e (Exception): exception to be stringified + + Returns: + unicode + """ + # urgh, this is a mess. The basic problem here is that psycopg2 constructs its + # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will + # then produce the raw byte sequence. Under Python 2, this will then cause another + # error if it gets mixed with a `unicode` object, as per + # https://github.com/matrix-org/synapse/issues/4252 + + # First of all, if we're under python3, everything is fine because it will sort this + # nonsense out for us. + if not PY2: + return str(e) + + # otherwise let's have a stab at decoding the exception message. We'll circumvent + # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii') + # and instead look at what is in the args member. + + if len(e.args) == 0: + return u"" + elif len(e.args) > 1: + return six.text_type(repr(e.args)) + + msg = e.args[0] + if isinstance(msg, bytes): + return msg.decode('utf-8', errors='replace') + else: + return msg