From a4bf72c30c5953b721a64eae89db186fa8735bb3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 29 Aug 2019 17:38:51 +0100
Subject: [PATCH 001/276] Censor redactions in DB after a month

---
 synapse/storage/events.py                     | 88 ++++++++++++++++++-
 .../schema/delta/56/redaction_censor.sql      | 17 ++++
 tests/storage/test_redaction.py               | 71 +++++++++++++++
 3 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 synapse/storage/schema/delta/56/redaction_censor.sql

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 5a95c36a8..2970da682 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -23,7 +23,7 @@ from functools import wraps
 from six import iteritems, text_type
 from six.moves import range
 
-from canonicaljson import json
+from canonicaljson import encode_canonical_json, json
 from prometheus_client import Counter, Histogram
 
 from twisted.internet import defer
@@ -33,6 +33,7 @@ from synapse.api.constants import EventTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase  # noqa: F401
 from synapse.events.snapshot import EventContext  # noqa: F401
+from synapse.events.utils import prune_event_dict
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.logging.utils import log_function
 from synapse.metrics import BucketCollector
@@ -262,6 +263,13 @@ class EventsStore(
 
         hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
 
+        def _censor_redactions():
+            return run_as_background_process(
+                "_censor_redactions", self._censor_redactions
+            )
+
+        hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000)
+
     @defer.inlineCallbacks
     def _read_forward_extremities(self):
         def fetch(txn):
@@ -1548,6 +1556,84 @@ class EventsStore(
             (event.event_id, event.redacts),
         )
 
+    @defer.inlineCallbacks
+    def _censor_redactions(self):
+        """Censors all redactions older than a month that haven't been censored.
+
+        By censor we mean update the event_json table with the redacted event.
+
+        Returns:
+            Deferred
+        """
+
+        if self.stream_ordering_month_ago is None:
+            return
+
+        max_pos = self.stream_ordering_month_ago
+
+        # We fetch all redactions that point to an event that we have that has
+        # a stream ordering from over a month ago, that we haven't yet censored
+        # in the DB.
+        sql = """
+            SELECT er.event_id, redacts FROM redactions
+            INNER JOIN events AS er USING (event_id)
+            INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id)
+            WHERE NOT have_censored
+            AND ? <= er.stream_ordering AND er.stream_ordering <= ?
+            ORDER BY er.stream_ordering ASC
+            LIMIT ?
+        """
+
+        rows = yield self._execute(
+            "_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100
+        )
+
+        updates = []
+
+        for redaction_id, event_id in rows:
+            redaction_event = yield self.get_event(redaction_id, allow_none=True)
+            original_event = yield self.get_event(
+                event_id, allow_rejected=True, allow_none=True
+            )
+
+            # The SQL above ensures that we have both the redaction and
+            # original event, so if the `get_event` calls return None it
+            # means that the redaction wasn't allowed. Either way we know that
+            # the result won't change so we mark the fact that we've checked.
+            if (
+                redaction_event
+                and original_event
+                and original_event.internal_metadata.is_redacted()
+            ):
+                # Redaction was allowed
+                pruned_json = encode_canonical_json(
+                    prune_event_dict(original_event.get_dict())
+                )
+            else:
+                # Redaction wasn't allowed
+                pruned_json = None
+
+            updates.append((redaction_id, event_id, pruned_json))
+
+        def _update_censor_txn(txn):
+            for redaction_id, event_id, pruned_json in updates:
+                if pruned_json:
+                    self._simple_update_one_txn(
+                        txn,
+                        table="event_json",
+                        keyvalues={"event_id": event_id},
+                        updatevalues={"json": pruned_json},
+                    )
+
+                self._simple_update_one_txn(
+                    txn,
+                    table="redactions",
+                    keyvalues={"event_id": redaction_id},
+                    updatevalues={"have_censored": True},
+                )
+
+        yield self.runInteraction("_update_censor_txn", _update_censor_txn)
+
     @defer.inlineCallbacks
     def count_daily_messages(self):
         """
diff --git a/synapse/storage/schema/delta/56/redaction_censor.sql b/synapse/storage/schema/delta/56/redaction_censor.sql
new file mode 100644
index 000000000..fe51b0230
--- /dev/null
+++ b/synapse/storage/schema/delta/56/redaction_censor.sql
@@ -0,0 +1,17 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false;
+CREATE INDEX redactions_have_censored ON redactions(event_id) WHERE not have_censored;
diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index d961b81d4..0c9f3c707 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -17,6 +17,8 @@
 
 from mock import Mock
 
+from canonicaljson import json
+
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, Membership
@@ -286,3 +288,72 @@ class RedactionTestCase(unittest.HomeserverTestCase):
         self.assertEqual(
             fetched.unsigned["redacted_because"].event_id, redaction_event_id2
         )
+
+    def test_redact_censor(self):
+        """Test that a redacted event gets censored in the DB after a month
+        """
+
+        self.get_success(
+            self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
+        )
+
+        msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t"))
+
+        # Check event has not been redacted:
+        event = self.get_success(self.store.get_event(msg_event.event_id))
+
+        self.assertObjectHasAttributes(
+            {
+                "type": EventTypes.Message,
+                "user_id": self.u_alice.to_string(),
+                "content": {"body": "t", "msgtype": "message"},
+            },
+            event,
+        )
+
+        self.assertFalse("redacted_because" in event.unsigned)
+
+        # Redact event
+        reason = "Because I said so"
+        self.get_success(
+            self.inject_redaction(self.room1, msg_event.event_id, self.u_alice, reason)
+        )
+
+        event = self.get_success(self.store.get_event(msg_event.event_id))
+
+        self.assertTrue("redacted_because" in event.unsigned)
+
+        self.assertObjectHasAttributes(
+            {
+                "type": EventTypes.Message,
+                "user_id": self.u_alice.to_string(),
+                "content": {},
+            },
+            event,
+        )
+
+        event_json = self.get_success(
+            self.store._simple_select_one_onecol(
+                table="event_json",
+                keyvalues={"event_id": msg_event.event_id},
+                retcol="json",
+            )
+        )
+
+        self.assert_dict(
+            {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json)
+        )
+
+        # Advance by 30 days
+        self.reactor.advance(60 * 60 * 24 * 31)
+        self.reactor.advance(60 * 60 * 2)
+
+        event_json = self.get_success(
+            self.store._simple_select_one_onecol(
+                table="event_json",
+                keyvalues={"event_id": msg_event.event_id},
+                retcol="json",
+            )
+        )
+
+        self.assert_dict({"content": {}}, json.loads(event_json))

From 549f974897ddf2fb0e5dc571c3da8034a1eb6510 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 30 Aug 2019 09:51:33 +0100
Subject: [PATCH 002/276] Newsfile

---
 changelog.d/5934.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/5934.feature

diff --git a/changelog.d/5934.feature b/changelog.d/5934.feature
new file mode 100644
index 000000000..eae969a52
--- /dev/null
+++ b/changelog.d/5934.feature
@@ -0,0 +1 @@
+Redact events in the database that have been redacted for a month.

From 3ff0422d2dbfa668df365da99a4b7caeea85528d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 5 Sep 2019 17:16:03 +0100
Subject: [PATCH 003/276] Make redaction retention period configurable

---
 docs/sample_config.yaml         |  5 +++++
 synapse/config/server.py        | 15 +++++++++++++++
 synapse/storage/events.py       |  6 ++++--
 tests/storage/test_redaction.py |  4 +++-
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 43969bbb7..e23b80d2b 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -306,6 +306,11 @@ listeners:
 #
 #allow_per_room_profiles: false
 
+# How long to keep redacted events in unredacted form in the database.
+# By default redactions are kept indefinitely.
+#
+#redaction_retention_period: 30d
+
 
 ## TLS ##
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 2abdef097..8efab924d 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -162,6 +162,16 @@ class ServerConfig(Config):
 
         self.mau_trial_days = config.get("mau_trial_days", 0)
 
+        # How long to keep redacted events in the database in unredacted form
+        # before redacting them.
+        redaction_retention_period = config.get("redaction_retention_period")
+        if redaction_retention_period:
+            self.redaction_retention_period = self.parse_duration(
+                redaction_retention_period
+            )
+        else:
+            self.redaction_retention_period = None
+
         # Options to disable HS
         self.hs_disabled = config.get("hs_disabled", False)
         self.hs_disabled_message = config.get("hs_disabled_message", "")
@@ -718,6 +728,11 @@ class ServerConfig(Config):
         # Defaults to 'true'.
         #
         #allow_per_room_profiles: false
+
+        # How long to keep redacted events in unredacted form in the database.
+        # By default redactions are kept indefinitely.
+        #
+        #redaction_retention_period: 30d
         """
             % locals()
         )
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 2970da682..d0d1781c9 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1566,10 +1566,12 @@ class EventsStore(
             Deferred
         """
 
-        if self.stream_ordering_month_ago is None:
+        if not self.hs.config.redaction_retention_period:
             return
 
-        max_pos = self.stream_ordering_month_ago
+        max_pos = yield self.find_first_stream_ordering_after_ts(
+            self._clock.time_msec() - self.hs.config.redaction_retention_period
+        )
 
         # We fetch all redactions that point to an event that we have that has
         # a stream ordering from over a month ago, that we haven't yet censored
diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index 0c9f3c707..f0e86d41a 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -344,7 +344,9 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json)
         )
 
-        # Advance by 30 days
+        # Advance by 30 days, then advance again to ensure that the looping call
+        # for updating the stream position gets called and then the looping call
+        # for the censoring gets called.
         self.reactor.advance(60 * 60 * 24 * 31)
         self.reactor.advance(60 * 60 * 2)
 

From ad9b64b4969537ac339469152eaa437bcf4b6609 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 5 Sep 2019 17:17:47 +0100
Subject: [PATCH 004/276] Fix test

---
 tests/storage/test_redaction.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index f0e86d41a..deecfad9f 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -31,8 +31,10 @@ from tests.utils import create_room
 
 class RedactionTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
+        config = self.default_config()
+        config["redaction_retention_period"] = "30d"
         return self.setup_test_homeserver(
-            resource_for_federation=Mock(), http_client=None
+            resource_for_federation=Mock(), http_client=None, config=config
         )
 
     def prepare(self, reactor, clock, hs):

From 05bae6b4fc97943b3738bac3175da1bc49f13512 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 10:13:14 +0100
Subject: [PATCH 005/276] Add opentracing span for HTTP push

---
 synapse/push/httppusher.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index bd5d53af9..629958780 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -22,6 +22,7 @@ from prometheus_client import Counter
 from twisted.internet import defer
 from twisted.internet.error import AlreadyCalled, AlreadyCancelled
 
+from synapse.logging import opentracing
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push import PusherConfigException
 
@@ -194,7 +195,17 @@ class HttpPusher(object):
         )
 
         for push_action in unprocessed:
-            processed = yield self._process_one(push_action)
+            with opentracing.start_active_span(
+                "http-push",
+                tags={
+                    "authenticated_entity": self.user_id,
+                    "event_id": push_action["event_id"],
+                    "app_id": self.app_id,
+                    "app_display_name": self.app_display_name,
+                },
+            ):
+                processed = yield self._process_one(push_action)
+
             if processed:
                 http_push_processed_counter.inc()
                 self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC

From a852e93408bf86a5acd939c58954621f653b56b6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 10:14:51 +0100
Subject: [PATCH 006/276] Newsfile

---
 changelog.d/6003.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6003.misc

diff --git a/changelog.d/6003.misc b/changelog.d/6003.misc
new file mode 100644
index 000000000..4152d05f8
--- /dev/null
+++ b/changelog.d/6003.misc
@@ -0,0 +1 @@
+Add opentracing span over HTTP push processing.

From be618e055178f4aa9865ab426182218312bed07f Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Mon, 9 Sep 2019 14:43:51 +0300
Subject: [PATCH 007/276] Only count real users when checking for auto-creation
 of auto-join room

Previously if the first registered user was a "support" or "bot" user,
when the first real user registers, the auto-join rooms were not
created.

Fix to exclude non-real (ie users with a special user type) users
when counting how many users there are to determine whether we should
auto-create a room.

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 changelog.d/6004.bugfix         |  1 +
 synapse/handlers/register.py    | 12 ++++------
 synapse/storage/registration.py | 39 +++++++++++++++++++++++++++++++++
 tests/handlers/test_register.py | 29 ++++++++++++++++++++++--
 4 files changed, 71 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/6004.bugfix

diff --git a/changelog.d/6004.bugfix b/changelog.d/6004.bugfix
new file mode 100644
index 000000000..45c179c8f
--- /dev/null
+++ b/changelog.d/6004.bugfix
@@ -0,0 +1 @@
+Only count real users when checking for auto-creation of auto-join room.
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 975da57ff..06bd03b77 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -275,16 +275,12 @@ class RegistrationHandler(BaseHandler):
         fake_requester = create_requester(user_id)
 
         # try to create the room if we're the first real user on the server. Note
-        # that an auto-generated support user is not a real user and will never be
+        # that an auto-generated support or bot user is not a real user and will never be
         # the user to create the room
         should_auto_create_rooms = False
-        is_support = yield self.store.is_support_user(user_id)
-        # There is an edge case where the first user is the support user, then
-        # the room is never created, though this seems unlikely and
-        # recoverable from given the support user being involved in the first
-        # place.
-        if self.hs.config.autocreate_auto_join_rooms and not is_support:
-            count = yield self.store.count_all_users()
+        is_real_user = yield self.store.is_real_user(user_id)
+        if self.hs.config.autocreate_auto_join_rooms and is_real_user:
+            count = yield self.store.count_real_users()
             should_auto_create_rooms = count == 1
         for r in self.hs.config.auto_join_rooms:
             logger.info("Auto-joining %s to %s", user_id, r)
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 5138792a5..b054d86ae 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -322,6 +322,21 @@ class RegistrationWorkerStore(SQLBaseStore):
 
         return None
 
+    @cachedInlineCallbacks()
+    def is_real_user(self, user_id):
+        """Determines if the user is a real user, ie does not have a 'user_type'.
+
+        Args:
+            user_id (str): user id to test
+
+        Returns:
+            Deferred[bool]: True if user 'user_type' is null or empty string
+        """
+        res = yield self.runInteraction(
+            "is_real_user", self.is_real_user_txn, user_id
+        )
+        return res
+
     @cachedInlineCallbacks()
     def is_support_user(self, user_id):
         """Determines if the user is of type UserTypes.SUPPORT
@@ -337,6 +352,16 @@ class RegistrationWorkerStore(SQLBaseStore):
         )
         return res
 
+    def is_real_user_txn(self, txn, user_id):
+        res = self._simple_select_one_onecol_txn(
+            txn=txn,
+            table="users",
+            keyvalues={"name": user_id},
+            retcol="user_type",
+            allow_none=True,
+        )
+        return True if res is None or res == "" else False
+
     def is_support_user_txn(self, txn, user_id):
         res = self._simple_select_one_onecol_txn(
             txn=txn,
@@ -421,6 +446,20 @@ class RegistrationWorkerStore(SQLBaseStore):
         ret = yield self.runInteraction("count_users", _count_users)
         return ret
 
+    @defer.inlineCallbacks
+    def count_real_users(self):
+        """Counts all users without a special user_type registered on the homeserver."""
+
+        def _count_users(txn):
+            txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''")
+            rows = self.cursor_to_dict(txn)
+            if rows:
+                return rows[0]["users"]
+            return 0
+
+        ret = yield self.runInteraction("count_real_users", _count_users)
+        return ret
+
     @defer.inlineCallbacks
     def find_next_generated_user_id_localpart(self):
         """
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index e10296a5e..1e9ba3a20 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -171,11 +171,11 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
         self.assertEqual(len(rooms), 0)
 
-    def test_auto_create_auto_join_rooms_when_support_user_exists(self):
+    def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self):
         room_alias_str = "#room:test"
         self.hs.config.auto_join_rooms = [room_alias_str]
 
-        self.store.is_support_user = Mock(return_value=True)
+        self.store.is_real_user = Mock(return_value=False)
         user_id = self.get_success(self.handler.register_user(localpart="support"))
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
         self.assertEqual(len(rooms), 0)
@@ -183,6 +183,31 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         room_alias = RoomAlias.from_string(room_alias_str)
         self.get_failure(directory_handler.get_association(room_alias), SynapseError)
 
+    def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self):
+        room_alias_str = "#room:test"
+        self.hs.config.auto_join_rooms = [room_alias_str]
+
+        self.store.count_real_users = Mock(return_value=1)
+        self.store.is_real_user = Mock(return_value=True)
+        user_id = self.get_success(self.handler.register_user(localpart="real"))
+        rooms = self.get_success(self.store.get_rooms_for_user(user_id))
+        directory_handler = self.hs.get_handlers().directory_handler
+        room_alias = RoomAlias.from_string(room_alias_str)
+        room_id = self.get_success(directory_handler.get_association(room_alias))
+
+        self.assertTrue(room_id["room_id"] in rooms)
+        self.assertEqual(len(rooms), 1)
+
+    def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(self):
+        room_alias_str = "#room:test"
+        self.hs.config.auto_join_rooms = [room_alias_str]
+
+        self.store.count_real_users = Mock(return_value=2)
+        self.store.is_real_user = Mock(return_value=True)
+        user_id = self.get_success(self.handler.register_user(localpart="real"))
+        rooms = self.get_success(self.store.get_rooms_for_user(user_id))
+        self.assertEqual(len(rooms), 0)
+
     def test_auto_create_auto_join_where_no_consent(self):
         """Test to ensure that the first user is not auto-joined to a room if
         they have not given general consent.

From 62fac9d969cea98694093a5f80bed6bdd4848968 Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Mon, 9 Sep 2019 14:59:35 +0300
Subject: [PATCH 008/276] Auto-fix a few code style issues

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 synapse/storage/registration.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index b054d86ae..9387b2950 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -332,9 +332,7 @@ class RegistrationWorkerStore(SQLBaseStore):
         Returns:
             Deferred[bool]: True if user 'user_type' is null or empty string
         """
-        res = yield self.runInteraction(
-            "is_real_user", self.is_real_user_txn, user_id
-        )
+        res = yield self.runInteraction("is_real_user", self.is_real_user_txn, user_id)
         return res
 
     @cachedInlineCallbacks()
@@ -451,7 +449,9 @@ class RegistrationWorkerStore(SQLBaseStore):
         """Counts all users without a special user_type registered on the homeserver."""
 
         def _count_users(txn):
-            txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''")
+            txn.execute(
+                "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''"
+            )
             rows = self.cursor_to_dict(txn)
             if rows:
                 return rows[0]["users"]

From 80e14a8546efb9e2f9edec3b1de0a8b943351252 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 13:23:41 +0100
Subject: [PATCH 009/276] Handle setting retention period to 0

---
 synapse/config/server.py  | 2 +-
 synapse/storage/events.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/config/server.py b/synapse/config/server.py
index 8efab924d..aa71835dc 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -165,7 +165,7 @@ class ServerConfig(Config):
         # How long to keep redacted events in the database in unredacted form
         # before redacting them.
         redaction_retention_period = config.get("redaction_retention_period")
-        if redaction_retention_period:
+        if redaction_retention_period is not None:
             self.redaction_retention_period = self.parse_duration(
                 redaction_retention_period
             )
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index a5d13ddc4..77ba7eb2a 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1566,7 +1566,7 @@ class EventsStore(
             Deferred
         """
 
-        if not self.hs.config.redaction_retention_period:
+        if self.hs.config.redaction_retention_period is None:
             return
 
         max_pos = yield self.find_first_stream_ordering_after_ts(

From fffe17b77d06927aaf64fa80be5b765c870a4ef5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 13:24:24 +0100
Subject: [PATCH 010/276] Don't start looping call unless enabled

---
 synapse/storage/events.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 77ba7eb2a..9ef7aefd9 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -268,7 +268,8 @@ class EventsStore(
                 "_censor_redactions", self._censor_redactions
             )
 
-        hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000)
+        if self.hs.config.redaction_retention_period is not None:
+            hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000)
 
     @defer.inlineCallbacks
     def _read_forward_extremities(self):

From 916c69722833dd94c53d0fedeec8cc42d2085e73 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 13:31:00 +0100
Subject: [PATCH 011/276] Fixup comment

---
 synapse/storage/events.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 9ef7aefd9..4484ae7ce 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -269,7 +269,7 @@ class EventsStore(
             )
 
         if self.hs.config.redaction_retention_period is not None:
-            hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000)
+            hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000)
 
     @defer.inlineCallbacks
     def _read_forward_extremities(self):
@@ -1574,9 +1574,17 @@ class EventsStore(
             self._clock.time_msec() - self.hs.config.redaction_retention_period
         )
 
-        # We fetch all redactions that point to an event that we have that has
-        # a stream ordering from over a month ago, that we haven't yet censored
-        # in the DB.
+        # We fetch all redactions that:
+        #   1. point to an event we have that has,
+        #   2. has a stream ordering from before the cut off, and
+        #   3. we haven't yet censored.
+        #
+        # This is limited to 100 events to ensure that we don't try and do too
+        # much at once. We'll get called again so this should eventually catch
+        # up.
+        #
+        # We use the range [-max_pos, max_pos] to handle backfilled events,
+        # which are given negative stream ordering.
         sql = """
             SELECT er.event_id, redacts FROM redactions
             INNER JOIN events AS er USING (event_id)

From e7184a437062ae21846b8e071ded73526209e90c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 13:33:38 +0100
Subject: [PATCH 012/276] Use better names in SQL

---
 synapse/storage/events.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 4484ae7ce..0da6e0b1a 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1586,12 +1586,15 @@ class EventsStore(
         # We use the range [-max_pos, max_pos] to handle backfilled events,
         # which are given negative stream ordering.
         sql = """
-            SELECT er.event_id, redacts FROM redactions
-            INNER JOIN events AS er USING (event_id)
-            INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id)
+            SELECT redact_event.event_id, redacts FROM redactions
+            INNER JOIN events AS redact_event USING (event_id)
+            INNER JOIN events AS original_event ON (
+                redact_event.room_id = original_event.room_id
+                AND redacts = original_event.event_id
+            )
             WHERE NOT have_censored
-            AND ? <= er.stream_ordering AND er.stream_ordering <= ?
-            ORDER BY er.stream_ordering ASC
+            AND ? <= redact_event.stream_ordering AND redact_event.stream_ordering <= ?
+            ORDER BY redact_event.stream_ordering ASC
             LIMIT ?
         """
 

From 8b9ade8c7871c862cf2122a156f00e411cd7a276 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 13:40:05 +0100
Subject: [PATCH 013/276] Default to censoring redactions after seven days

---
 docs/sample_config.yaml  |  8 +++++---
 synapse/config/server.py | 10 ++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index e23b80d2b..24adc3da2 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -306,10 +306,12 @@ listeners:
 #
 #allow_per_room_profiles: false
 
-# How long to keep redacted events in unredacted form in the database.
-# By default redactions are kept indefinitely.
+# How long to keep redacted events in unredacted form in the database. After
+# this period redacted events get replaced with their redacted form in the DB.
 #
-#redaction_retention_period: 30d
+# Defaults to `7d`. Set to `null` to disable.
+#
+redaction_retention_period: 7d
 
 
 ## TLS ##
diff --git a/synapse/config/server.py b/synapse/config/server.py
index aa71835dc..c8b9fe2d0 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -164,7 +164,7 @@ class ServerConfig(Config):
 
         # How long to keep redacted events in the database in unredacted form
         # before redacting them.
-        redaction_retention_period = config.get("redaction_retention_period")
+        redaction_retention_period = config.get("redaction_retention_period", "7d")
         if redaction_retention_period is not None:
             self.redaction_retention_period = self.parse_duration(
                 redaction_retention_period
@@ -729,10 +729,12 @@ class ServerConfig(Config):
         #
         #allow_per_room_profiles: false
 
-        # How long to keep redacted events in unredacted form in the database.
-        # By default redactions are kept indefinitely.
+        # How long to keep redacted events in unredacted form in the database. After
+        # this period redacted events get replaced with their redacted form in the DB.
         #
-        #redaction_retention_period: 30d
+        # Defaults to `7d`. Set to `null` to disable.
+        #
+        redaction_retention_period: 7d
         """
             % locals()
         )

From 8c03cd0e5f73fb59ee773dc6cce77f2dc4dab827 Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Mon, 9 Sep 2019 16:40:40 +0300
Subject: [PATCH 014/276] Simplify is_real_user_txn check to trust user_type is
 null if real user

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 synapse/storage/registration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 9387b2950..54b0846c5 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -358,7 +358,7 @@ class RegistrationWorkerStore(SQLBaseStore):
             retcol="user_type",
             allow_none=True,
         )
-        return True if res is None or res == "" else False
+        return res is None
 
     def is_support_user_txn(self, txn, user_id):
         res = self._simple_select_one_onecol_txn(

From e89fea4f04c6fc7df41c5cade63609b513a98073 Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Mon, 9 Sep 2019 16:43:32 +0300
Subject: [PATCH 015/276] Simplify count_real_users SQL to only count user_type
 is null rows

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 synapse/storage/registration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 54b0846c5..c0ca25733 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -450,7 +450,7 @@ class RegistrationWorkerStore(SQLBaseStore):
 
         def _count_users(txn):
             txn.execute(
-                "SELECT COUNT(*) AS users FROM users where user_type is null or user_type = ''"
+                "SELECT COUNT(*) AS users FROM users where user_type is null"
             )
             rows = self.cursor_to_dict(txn)
             if rows:

From 580f3df9b2573c0278dd952d1478689e5cd23a7b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 9 Sep 2019 15:08:24 +0100
Subject: [PATCH 016/276] Fix comments

---
 synapse/storage/events.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 0da6e0b1a..ddf7ab647 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1559,7 +1559,8 @@ class EventsStore(
 
     @defer.inlineCallbacks
     def _censor_redactions(self):
-        """Censors all redactions older than a month that haven't been censored.
+        """Censors all redactions older than the configured period that haven't
+        been censored yet.
 
         By censor we mean update the event_json table with the redacted event.
 
@@ -1575,7 +1576,7 @@ class EventsStore(
         )
 
         # We fetch all redactions that:
-        #   1. point to an event we have that has,
+        #   1. point to an event we have,
         #   2. has a stream ordering from before the cut off, and
         #   3. we haven't yet censored.
         #

From aaed6b39e140195a0f2b48e4de0519e08f16a119 Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Mon, 9 Sep 2019 17:10:02 +0300
Subject: [PATCH 017/276] Fix code style, again

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 synapse/storage/registration.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index c0ca25733..109052fa4 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -449,9 +449,7 @@ class RegistrationWorkerStore(SQLBaseStore):
         """Counts all users without a special user_type registered on the homeserver."""
 
         def _count_users(txn):
-            txn.execute(
-                "SELECT COUNT(*) AS users FROM users where user_type is null"
-            )
+            txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null")
             rows = self.cursor_to_dict(txn)
             if rows:
                 return rows[0]["users"]

From aeb9b2179eaa4b468bec937570d3ac7de7ccaaea Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Tue, 10 Sep 2019 00:14:58 +1000
Subject: [PATCH 018/276] Add a build info metric to Prometheus (#6005)

---
 changelog.d/6005.feature    |  1 +
 synapse/metrics/__init__.py | 12 ++++++++++++
 tests/test_metrics.py       | 22 ++++++++++++++++++++--
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6005.feature

diff --git a/changelog.d/6005.feature b/changelog.d/6005.feature
new file mode 100644
index 000000000..ed6491d3e
--- /dev/null
+++ b/changelog.d/6005.feature
@@ -0,0 +1 @@
+The new Prometheus metric `synapse_build_info` exposes the Python version, OS version, and Synapse version of the running server.
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 488280b4a..b5c9595cb 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -29,11 +29,13 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF
 
 from twisted.internet import reactor
 
+import synapse
 from synapse.metrics._exposition import (
     MetricsResource,
     generate_latest,
     start_http_server,
 )
+from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger(__name__)
 
@@ -385,6 +387,16 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name"
 # finished being processed.
 event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"])
 
+# Build info of the running server.
+build_info = Gauge(
+    "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"]
+)
+build_info.labels(
+    " ".join([platform.python_implementation(), platform.python_version()]),
+    get_version_string(synapse),
+    " ".join([platform.system(), platform.release()]),
+).set(1)
+
 last_ticked = time.time()
 
 
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 2edbae5c6..270f853d6 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2018 New Vector Ltd
+# Copyright 2019 Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-from synapse.metrics import InFlightGauge
+from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
 
 from tests import unittest
 
@@ -111,3 +111,21 @@ class TestMauLimit(unittest.TestCase):
             }
 
         return results
+
+
+class BuildInfoTests(unittest.TestCase):
+    def test_get_build(self):
+        """
+        The synapse_build_info metric reports the OS version, Python version,
+        and Synapse version.
+        """
+        items = list(
+            filter(
+                lambda x: b"synapse_build_info{" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        )
+        self.assertEqual(len(items), 1)
+        self.assertTrue(b"osversion=" in items[0])
+        self.assertTrue(b"pythonversion=" in items[0])
+        self.assertTrue(b"version=" in items[0])

From 60d3c57bd0c977cbe6b7585a2c1517cc4e2c16dd Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 10 Sep 2019 15:57:32 +0100
Subject: [PATCH 019/276] Use account_threepid_delegate for 3pid validation

---
 synapse/handlers/auth.py                | 11 +++-
 synapse/handlers/identity.py            | 75 +++++++++++--------------
 synapse/rest/client/v2_alpha/account.py |  3 +-
 3 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index d0c014274..374372b69 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -444,7 +444,16 @@ class AuthHandler(BaseHandler):
 
         logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
         if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            threepid = yield identity_handler.threepid_from_creds(threepid_creds)
+            if medium == "email":
+                threepid = yield identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_email, threepid_creds
+                )
+            elif medium == "msisdn":
+                threepid = yield identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+                )
+            else:
+                raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,))
         elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
             row = yield self.store.get_threepid_validation_session(
                 medium,
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 71b5a8739..2dfb79fde 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -75,59 +75,52 @@ class IdentityHandler(BaseHandler):
         return client_secret, id_server, id_access_token
 
     @defer.inlineCallbacks
-    def threepid_from_creds(self, creds, use_v2=True):
+    def threepid_from_creds(self, id_server, creds):
         """
-        Retrieve and validate a threepid identitier from a "credentials" dictionary
+        Retrieve and validate a threepid identifier from a "credentials" dictionary against a
+        given identity server
 
         Args:
-            creds (dict[str, str]): Dictionary of credentials that contain the following keys:
+            id_server (str|None): The identity server to validate 3PIDs against. If None,
+                we will attempt to extract id_server creds
+
+            creds (dict[str, str]): Dictionary containing the following key:
+                * id_server: An optional domain name of an identity server
                 * client_secret|clientSecret: A unique secret str provided by the client
-                * id_server|idServer: the domain of the identity server to query
-                * id_access_token: The access token to authenticate to the identity
-                    server with. Required if use_v2 is true
-            use_v2 (bool): Whether to use v2 Identity Service API endpoints
+                * sid: The ID of the validation session
 
         Returns:
             Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to
                 the /getValidated3pid endpoint of the Identity Service API, or None if the
                 threepid was not found
         """
-        client_secret, id_server, id_access_token = self._extract_items_from_creds_dict(
-            creds
+        client_secret = creds.get("client_secret") or creds.get("clientSecret")
+        if not client_secret:
+            raise SynapseError(
+                400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM
+            )
+        session_id = creds.get("sid")
+        if not session_id:
+            raise SynapseError(
+                400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM
+            )
+        if not id_server:
+            # Attempt to get the id_server from the creds dict
+            id_server = creds.get("id_server")
+            if not id_server:
+                raise SynapseError(
+                    400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM
+                )
+
+        query_params = {"sid": session_id, "client_secret": client_secret}
+
+        url = "https://%s%s" % (
+            id_server,
+            "/_matrix/identity/api/v1/3pid/getValidated3pid",
         )
 
-        # If an id_access_token is not supplied, force usage of v1
-        if id_access_token is None:
-            use_v2 = False
-
-        query_params = {"sid": creds["sid"], "client_secret": client_secret}
-
-        # Decide which API endpoint URLs and query parameters to use
-        if use_v2:
-            url = "https://%s%s" % (
-                id_server,
-                "/_matrix/identity/v2/3pid/getValidated3pid",
-            )
-            query_params["id_access_token"] = id_access_token
-        else:
-            url = "https://%s%s" % (
-                id_server,
-                "/_matrix/identity/api/v1/3pid/getValidated3pid",
-            )
-
-        try:
-            data = yield self.http_client.get_json(url, query_params)
-            return data if "medium" in data else None
-        except HttpResponseException as e:
-            if e.code != 404 or not use_v2:
-                # Generic failure
-                logger.info("getValidated3pid failed with Matrix error: %r", e)
-                raise e.to_synapse_error()
-
-        # This identity server is too old to understand Identity Service API v2
-        # Attempt v1 endpoint
-        logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", url)
-        return (yield self.threepid_from_creds(creds, use_v2=False))
+        data = yield self.http_client.get_json(url, query_params)
+        return data if "medium" in data else None
 
     @defer.inlineCallbacks
     def bind_threepid(self, creds, mxid, use_v2=True):
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 785d01ea5..94a8fec8f 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -523,7 +523,8 @@ class ThreepidRestServlet(RestServlet):
         requester = yield self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
-        threepid = yield self.identity_handler.threepid_from_creds(threepid_creds)
+        # Retrieve the identity server from the request
+        threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds)
 
         if not threepid:
             raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED)

From b5833a2abf788a4144602c3e0de15d371608094b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 10 Sep 2019 17:43:57 +0100
Subject: [PATCH 020/276] Add changelog

---
 changelog.d/6011.feature                | 1 +
 synapse/handlers/identity.py            | 6 +++---
 synapse/rest/client/v2_alpha/account.py | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6011.feature

diff --git a/changelog.d/6011.feature b/changelog.d/6011.feature
new file mode 100644
index 000000000..ad16acb12
--- /dev/null
+++ b/changelog.d/6011.feature
@@ -0,0 +1 @@
+Use account_threepid_delegate.email and account_threepid_delegate.msisdn for validating threepid sessions.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 2dfb79fde..f6d1d1717 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -84,8 +84,8 @@ class IdentityHandler(BaseHandler):
             id_server (str|None): The identity server to validate 3PIDs against. If None,
                 we will attempt to extract id_server creds
 
-            creds (dict[str, str]): Dictionary containing the following key:
-                * id_server: An optional domain name of an identity server
+            creds (dict[str, str]): Dictionary containing the following keys:
+                * id_server|idServer: An optional domain name of an identity server
                 * client_secret|clientSecret: A unique secret str provided by the client
                 * sid: The ID of the validation session
 
@@ -106,7 +106,7 @@ class IdentityHandler(BaseHandler):
             )
         if not id_server:
             # Attempt to get the id_server from the creds dict
-            id_server = creds.get("id_server")
+            id_server = creds.get("id_server") or creds.get("idServer")
             if not id_server:
                 raise SynapseError(
                     400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 94a8fec8f..2ea515d2f 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -523,7 +523,7 @@ class ThreepidRestServlet(RestServlet):
         requester = yield self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
-        # Retrieve the identity server from the request
+        # Specify None as the identity server to retrieve it from the request body instead
         threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds)
 
         if not threepid:

From cd17a2085eb517d24c68e33cd3906375a8baeb3b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 11 Sep 2019 10:37:17 +0100
Subject: [PATCH 021/276] Remove origin parameter from
 add_display_name_to_third_party_invite and add params to docstring (#6010)

Another small fixup noticed during work on a larger PR. The `origin` field of `add_display_name_to_third_party_invite` is not used and likely was just carried over from the `on_PUT` method of `FederationThirdPartyInviteExchangeServlet` which, like all other servlets, provides an `origin` argument.

Since it's not used anywhere in the handler function though, we should remove it from the function arguments.
---
 changelog.d/6010.misc                   | 1 +
 synapse/federation/federation_server.py | 4 ++--
 synapse/federation/transport/server.py  | 2 +-
 synapse/handlers/federation.py          | 7 ++++++-
 4 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6010.misc

diff --git a/changelog.d/6010.misc b/changelog.d/6010.misc
new file mode 100644
index 000000000..0659f12eb
--- /dev/null
+++ b/changelog.d/6010.misc
@@ -0,0 +1 @@
+Remove unused `origin` argument on FederationHandler.add_display_name_to_third_party_invite.
\ No newline at end of file
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index e5f0b90ae..da06ab379 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -669,9 +669,9 @@ class FederationServer(FederationBase):
         return ret
 
     @defer.inlineCallbacks
-    def on_exchange_third_party_invite_request(self, origin, room_id, event_dict):
+    def on_exchange_third_party_invite_request(self, room_id, event_dict):
         ret = yield self.handler.on_exchange_third_party_invite_request(
-            origin, room_id, event_dict
+            room_id, event_dict
         )
         return ret
 
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 132a8fb5e..7dc696c7a 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -575,7 +575,7 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet):
 
     async def on_PUT(self, origin, content, query, room_id):
         content = await self.handler.on_exchange_third_party_invite_request(
-            origin, room_id, content
+            room_id, content
         )
         return 200, content
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 538b16efd..f72b81d41 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2530,12 +2530,17 @@ class FederationHandler(BaseHandler):
 
     @defer.inlineCallbacks
     @log_function
-    def on_exchange_third_party_invite_request(self, origin, room_id, event_dict):
+    def on_exchange_third_party_invite_request(self, room_id, event_dict):
         """Handle an exchange_third_party_invite request from a remote server
 
         The remote server will call this when it wants to turn a 3pid invite
         into a normal m.room.member invite.
 
+        Args:
+            room_id (str): The ID of the room.
+
+            event_dict (dict[str, Any]): Dictionary containing the event body.
+
         Returns:
             Deferred: resolves (to None)
         """

From 54ce81c86d163b883df67b97540426759a9f6363 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 10:46:38 +0100
Subject: [PATCH 022/276] Allow use of different ratelimits for admin
 redactions.

This is useful to allow room admins to quickly deal with a large number
of abusive messages.
---
 synapse/config/ratelimiting.py | 13 +++++++++++
 synapse/handlers/_base.py      | 41 +++++++++++++++++++++++++---------
 synapse/handlers/message.py    |  8 ++++++-
 synapse/server.py              |  4 ++++
 4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 33f31cf21..b4df6612d 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -80,6 +80,12 @@ class RatelimitConfig(Config):
             "federation_rr_transactions_per_room_per_second", 50
         )
 
+        rc_admin_redaction = config.get("rc_admin_redaction")
+        if rc_admin_redaction:
+            self.rc_admin_redaction = RateLimitConfig(rc_admin_redaction)
+        else:
+            self.rc_admin_redaction = None
+
     def generate_config_section(self, **kwargs):
         return """\
         ## Ratelimiting ##
@@ -102,6 +108,9 @@ class RatelimitConfig(Config):
         #   - one for login that ratelimits login requests based on the account the
         #     client is attempting to log into, based on the amount of failed login
         #     attempts for this account.
+        #   - one for ratelimiting redactions by room admins. If this is not explicitly
+        #     set then it uses the same ratelimiting as per rc_message. This is useful
+        #     to allow room admins to quickly deal with abuse quickly.
         #
         # The defaults are as shown below.
         #
@@ -123,6 +132,10 @@ class RatelimitConfig(Config):
         #  failed_attempts:
         #    per_second: 0.17
         #    burst_count: 3
+        #
+        #rc_admin_redaction:
+        #  per_second: 1
+        #  burst_count: 50
 
 
         # Ratelimiting settings for incoming federation
diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py
index c29c78bd6..853b72d8e 100644
--- a/synapse/handlers/_base.py
+++ b/synapse/handlers/_base.py
@@ -45,6 +45,7 @@ class BaseHandler(object):
         self.state_handler = hs.get_state_handler()
         self.distributor = hs.get_distributor()
         self.ratelimiter = hs.get_ratelimiter()
+        self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()
         self.clock = hs.get_clock()
         self.hs = hs
 
@@ -53,7 +54,7 @@ class BaseHandler(object):
         self.event_builder_factory = hs.get_event_builder_factory()
 
     @defer.inlineCallbacks
-    def ratelimit(self, requester, update=True):
+    def ratelimit(self, requester, update=True, is_admin_redaction=False):
         """Ratelimits requests.
 
         Args:
@@ -62,6 +63,9 @@ class BaseHandler(object):
                 Set to False when doing multiple checks for one request (e.g.
                 to check up front if we would reject the request), and set to
                 True for the last call for a given request.
+            is_admin_redaction (bool): Whether this is a room admin/moderator
+                redacting an event. If so then we may apply different
+                ratelimits depending on config.
 
         Raises:
             LimitExceededError if the request should be ratelimited
@@ -90,16 +94,33 @@ class BaseHandler(object):
             messages_per_second = override.messages_per_second
             burst_count = override.burst_count
         else:
-            messages_per_second = self.hs.config.rc_message.per_second
-            burst_count = self.hs.config.rc_message.burst_count
+            # We default to different values if this is an admin redaction and
+            # the config is set
+            if is_admin_redaction and self.hs.config.rc_admin_redaction:
+                messages_per_second = self.hs.config.rc_admin_redaction.per_second
+                burst_count = self.hs.config.rc_admin_redaction.burst_count
+            else:
+                messages_per_second = self.hs.config.rc_message.per_second
+                burst_count = self.hs.config.rc_message.burst_count
 
-        allowed, time_allowed = self.ratelimiter.can_do_action(
-            user_id,
-            time_now,
-            rate_hz=messages_per_second,
-            burst_count=burst_count,
-            update=update,
-        )
+        if is_admin_redaction and self.hs.config.rc_admin_redaction:
+            # If we have separate config for admin redactions we use a separate
+            # ratelimiter.
+            allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
+                user_id,
+                time_now,
+                rate_hz=messages_per_second,
+                burst_count=burst_count,
+                update=update,
+            )
+        else:
+            allowed, time_allowed = self.ratelimiter.can_do_action(
+                user_id,
+                time_now,
+                rate_hz=messages_per_second,
+                burst_count=burst_count,
+                update=update,
+            )
         if not allowed:
             raise LimitExceededError(
                 retry_after_ms=int(1000 * (time_allowed - time_now))
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 111f7c7e2..184170ef8 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -729,7 +729,13 @@ class EventCreationHandler(object):
         assert not self.config.worker_app
 
         if ratelimit:
-            yield self.base_handler.ratelimit(requester)
+            is_admin_redaction = (
+                event.type == EventTypes.Redaction
+                and event.sender != requester.user.to_string()
+            )
+            yield self.base_handler.ratelimit(
+                requester, is_admin_redaction=is_admin_redaction
+            )
 
         yield self.base_handler.maybe_kick_guest_users(event, context)
 
diff --git a/synapse/server.py b/synapse/server.py
index 9e28dba2b..1fcc7375d 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -221,6 +221,7 @@ class HomeServer(object):
         self.clock = Clock(reactor)
         self.distributor = Distributor()
         self.ratelimiter = Ratelimiter()
+        self.admin_redaction_ratelimiter = Ratelimiter()
         self.registration_ratelimiter = Ratelimiter()
 
         self.datastore = None
@@ -279,6 +280,9 @@ class HomeServer(object):
     def get_registration_ratelimiter(self):
         return self.registration_ratelimiter
 
+    def get_admin_redaction_ratelimiter(self):
+        return self.admin_redaction_ratelimiter
+
     def build_federation_client(self):
         return FederationClient(self)
 

From 2434c0084b670f54eb530b3ab6fad26aea6a075d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 10:48:52 +0100
Subject: [PATCH 023/276] Newsfile

---
 changelog.d/6015.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6015.feature

diff --git a/changelog.d/6015.feature b/changelog.d/6015.feature
new file mode 100644
index 000000000..42aaffced
--- /dev/null
+++ b/changelog.d/6015.feature
@@ -0,0 +1 @@
+Add config option to increase ratelimits for room admins redacting messages.

From 8df88b5ff37641a07a1e75f0a08d3744c5140452 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 10:58:26 +0100
Subject: [PATCH 024/276] Update sample config

---
 docs/sample_config.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 9b1ae58a2..ce657921b 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -518,6 +518,9 @@ log_config: "CONFDIR/SERVERNAME.log.config"
 #   - one for login that ratelimits login requests based on the account the
 #     client is attempting to log into, based on the amount of failed login
 #     attempts for this account.
+#   - one for ratelimiting redactions by room admins. If this is not explicitly
+#     set then it uses the same ratelimiting as per rc_message. This is useful
+#     to allow room admins to quickly deal with abuse quickly.
 #
 # The defaults are as shown below.
 #
@@ -539,6 +542,10 @@ log_config: "CONFDIR/SERVERNAME.log.config"
 #  failed_attempts:
 #    per_second: 0.17
 #    burst_count: 3
+#
+#rc_admin_redaction:
+#  per_second: 1
+#  burst_count: 50
 
 
 # Ratelimiting settings for incoming federation

From c64c3bb4c5b740e3f505708bc5dde0b5b29de6b8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 11:16:17 +0100
Subject: [PATCH 025/276] Fix how we check for self redaction

---
 synapse/handlers/message.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 184170ef8..f97590941 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -729,10 +729,24 @@ class EventCreationHandler(object):
         assert not self.config.worker_app
 
         if ratelimit:
-            is_admin_redaction = (
-                event.type == EventTypes.Redaction
-                and event.sender != requester.user.to_string()
-            )
+            # We check if this is a room admin redacting an event so that we
+            # can apply different ratelimiting. We do this by simply checking
+            # its not a self-redaction (to avoid having to look up whether the
+            # user is actually admin or not).
+            is_admin_redaction = False
+            if event.type == EventTypes.Redaction:
+                original_event = yield self.store.get_event(
+                    event.redacts,
+                    check_redacted=False,
+                    get_prev_content=False,
+                    allow_rejected=False,
+                    allow_none=True,
+                )
+
+                is_admin_redaction = (
+                    original_event and event.sender != original_event.sender
+                )
+
             yield self.base_handler.ratelimit(
                 requester, is_admin_redaction=is_admin_redaction
             )

From caa9d6fed719a8a80eb4a998d32f09577d04f927 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 11:16:23 +0100
Subject: [PATCH 026/276] Add test for admin redaction ratelimiting.

---
 tests/rest/client/test_redactions.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index fe66e397c..1b1e991c4 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -30,6 +30,14 @@ class RedactionsTestCase(HomeserverTestCase):
         sync.register_servlets,
     ]
 
+    def make_homeserver(self, reactor, clock):
+        config = self.default_config()
+
+        config["rc_message"] = {"per_second": 0.2, "burst_count": 10}
+        config["rc_admin_redaction"] = {"per_second": 1, "burst_count": 100}
+
+        return self.setup_test_homeserver(config=config)
+
     def prepare(self, reactor, clock, hs):
         # register a couple of users
         self.mod_user_id = self.register_user("user1", "pass")
@@ -177,3 +185,20 @@ class RedactionsTestCase(HomeserverTestCase):
         self._redact_event(
             self.other_access_token, self.room_id, create_event_id, expect_code=403
         )
+
+    def test_redact_event_as_moderator_ratelimit(self):
+        """Tests that the correct ratelimiting is applied to redactions
+        """
+
+        message_ids = []
+        # as a regular user, send messages to redact
+        for _ in range(20):
+            b = self.helper.send(room_id=self.room_id, tok=self.other_access_token)
+            message_ids.append(b["event_id"])
+            self.reactor.advance(10)  # To get around ratelimits
+
+        # as the moderator, send a bunch of redactions redaction
+        for msg_id in message_ids:
+            # These should all succeed, even though this would be denied by
+            # standard message ratelimiter
+            self._redact_event(self.mod_access_token, self.room_id, msg_id)

From 3505ffcda7d04a9c0100ff423a2239d1e6340fd0 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 11 Sep 2019 11:59:45 +0100
Subject: [PATCH 027/276] Fix existing v2 identity server calls (MSC2140)
 (#6013)

Two things I missed while implementing [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140/files#diff-c03a26de5ac40fb532de19cb7fc2aaf7R80).

1. Access tokens should be provided to the identity server as `access_token`, not `id_access_token`, even though the homeserver may accept the tokens as `id_access_token`.
2. Access tokens must be sent to the identity server in a query parameter, the JSON body is not allowed.

We now send the access token as part of an `Authorization: ...` header, which fixes both things.

The breaking code was added in https://github.com/matrix-org/synapse/pull/5892

Sytest PR: https://github.com/matrix-org/sytest/pull/697
---
 changelog.d/6013.misc        |  1 +
 synapse/handlers/identity.py | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6013.misc

diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc
new file mode 100644
index 000000000..939fe8c65
--- /dev/null
+++ b/changelog.d/6013.misc
@@ -0,0 +1 @@
+Compatibility with v2 Identity Service APIs other than /lookup.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index f0549666c..f690fd04a 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -74,6 +74,25 @@ class IdentityHandler(BaseHandler):
         id_access_token = creds.get("id_access_token")
         return client_secret, id_server, id_access_token
 
+    def create_id_access_token_header(self, id_access_token):
+        """Create an Authorization header for passing to SimpleHttpClient as the header value
+        of an HTTP request.
+
+        Args:
+            id_access_token (str): An identity server access token.
+
+        Returns:
+            list[str]: The ascii-encoded bearer token encased in a list.
+        """
+        # Prefix with Bearer
+        bearer_token = "Bearer %s" % id_access_token
+
+        # Encode headers to standard ascii
+        bearer_token.encode("ascii")
+
+        # Return as a list as that's how SimpleHttpClient takes header values
+        return [bearer_token]
+
     @defer.inlineCallbacks
     def threepid_from_creds(self, id_server, creds):
         """
@@ -155,15 +174,20 @@ class IdentityHandler(BaseHandler):
             use_v2 = False
 
         # Decide which API endpoint URLs to use
+        headers = {}
         bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid}
         if use_v2:
             bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
-            bind_data["id_access_token"] = id_access_token
+            headers["Authorization"] = self.create_id_access_token_header(
+                id_access_token
+            )
         else:
             bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
 
         try:
-            data = yield self.http_client.post_json_get_json(bind_url, bind_data)
+            data = yield self.http_client.post_json_get_json(
+                bind_url, bind_data, headers=headers
+            )
             logger.debug("bound threepid %r to %s", creds, mxid)
 
             # Remember where we bound the threepid

From 57dd41a45b4df5d736e2f30d40926b60f367b500 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 13:54:50 +0100
Subject: [PATCH 028/276] Fix comments

Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
---
 synapse/config/ratelimiting.py       | 2 +-
 synapse/handlers/_base.py            | 2 +-
 synapse/handlers/message.py          | 2 +-
 tests/rest/client/test_redactions.py | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index b4df6612d..587e2862b 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -110,7 +110,7 @@ class RatelimitConfig(Config):
         #     attempts for this account.
         #   - one for ratelimiting redactions by room admins. If this is not explicitly
         #     set then it uses the same ratelimiting as per rc_message. This is useful
-        #     to allow room admins to quickly deal with abuse quickly.
+        #     to allow room admins to deal with abuse quickly.
         #
         # The defaults are as shown below.
         #
diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py
index 853b72d8e..d15c6282f 100644
--- a/synapse/handlers/_base.py
+++ b/synapse/handlers/_base.py
@@ -105,7 +105,7 @@ class BaseHandler(object):
 
         if is_admin_redaction and self.hs.config.rc_admin_redaction:
             # If we have separate config for admin redactions we use a separate
-            # ratelimiter.
+            # ratelimiter
             allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
                 user_id,
                 time_now,
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index f97590941..1f8272784 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -731,7 +731,7 @@ class EventCreationHandler(object):
         if ratelimit:
             # We check if this is a room admin redacting an event so that we
             # can apply different ratelimiting. We do this by simply checking
-            # its not a self-redaction (to avoid having to look up whether the
+            # it's not a self-redaction (to avoid having to look up whether the
             # user is actually admin or not).
             is_admin_redaction = False
             if event.type == EventTypes.Redaction:
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index 1b1e991c4..d2bcf256f 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -197,8 +197,8 @@ class RedactionsTestCase(HomeserverTestCase):
             message_ids.append(b["event_id"])
             self.reactor.advance(10)  # To get around ratelimits
 
-        # as the moderator, send a bunch of redactions redaction
+        # as the moderator, send a bunch of redactions
         for msg_id in message_ids:
             # These should all succeed, even though this would be denied by
-            # standard message ratelimiter
+            # the standard message ratelimiter
             self._redact_event(self.mod_access_token, self.room_id, msg_id)

From 6604b64fae970f534d3e2a61f2fbbe51599fa26d Mon Sep 17 00:00:00 2001
From: Jorik Schellekens <joriksch@gmail.com>
Date: Wed, 11 Sep 2019 14:00:37 +0100
Subject: [PATCH 029/276] Check dependencies on setup in the nicer way. (#5989)

---
 changelog.d/5989.misc          |  1 +
 synapse/config/metrics.py      | 12 +++++-------
 synapse/config/repository.py   | 27 +++++++--------------------
 synapse/python_dependencies.py |  8 +++++++-
 4 files changed, 20 insertions(+), 28 deletions(-)
 create mode 100644 changelog.d/5989.misc

diff --git a/changelog.d/5989.misc b/changelog.d/5989.misc
new file mode 100644
index 000000000..9f2525fd3
--- /dev/null
+++ b/changelog.d/5989.misc
@@ -0,0 +1 @@
+Clean up dependency checking at setup.
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index 653b990e6..9eb1e55dd 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -16,11 +16,9 @@
 
 import attr
 
-from ._base import Config, ConfigError
+from synapse.python_dependencies import DependencyException, check_requirements
 
-MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry
-    integration.
-    """
+from ._base import Config, ConfigError
 
 
 @attr.s
@@ -51,9 +49,9 @@ class MetricsConfig(Config):
         self.sentry_enabled = "sentry" in config
         if self.sentry_enabled:
             try:
-                import sentry_sdk  # noqa F401
-            except ImportError:
-                raise ConfigError(MISSING_SENTRY)
+                check_requirements("sentry")
+            except DependencyException as e:
+                raise ConfigError(e.message)
 
             self.sentry_dsn = config["sentry"].get("dsn")
             if not self.sentry_dsn:
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index fdb1f246d..34f1a9a92 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -16,6 +16,7 @@
 import os
 from collections import namedtuple
 
+from synapse.python_dependencies import DependencyException, check_requirements
 from synapse.util.module_loader import load_module
 
 from ._base import Config, ConfigError
@@ -34,17 +35,6 @@ THUMBNAIL_SIZE_YAML = """\
         #    method: %(method)s
 """
 
-MISSING_NETADDR = "Missing netaddr library. This is required for URL preview API."
-
-MISSING_LXML = """Missing lxml library. This is required for URL preview API.
-
-    Install by running:
-        pip install lxml
-
-    Requires libxslt1-dev system package.
-    """
-
-
 ThumbnailRequirement = namedtuple(
     "ThumbnailRequirement", ["width", "height", "method", "media_type"]
 )
@@ -171,16 +161,10 @@ class ContentRepositoryConfig(Config):
         self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
             try:
-                import lxml
+                check_requirements("url_preview")
 
-                lxml  # To stop unused lint.
-            except ImportError:
-                raise ConfigError(MISSING_LXML)
-
-            try:
-                from netaddr import IPSet
-            except ImportError:
-                raise ConfigError(MISSING_NETADDR)
+            except DependencyException as e:
+                raise ConfigError(e.message)
 
             if "url_preview_ip_range_blacklist" not in config:
                 raise ConfigError(
@@ -189,6 +173,9 @@ class ContentRepositoryConfig(Config):
                     "to work"
                 )
 
+            # netaddr is a dependency for url_preview
+            from netaddr import IPSet
+
             self.url_preview_ip_range_blacklist = IPSet(
                 config["url_preview_ip_range_blacklist"]
             )
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index ec0ac547c..07345e916 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -147,7 +147,13 @@ def check_requirements(for_feature=None):
             )
         except DistributionNotFound:
             deps_needed.append(dependency)
-            errors.append("Needed %s but it was not installed" % (dependency,))
+            if for_feature:
+                errors.append(
+                    "Needed %s for the '%s' feature but it was not installed"
+                    % (dependency, for_feature)
+                )
+            else:
+                errors.append("Needed %s but it was not installed" % (dependency,))
 
     if not for_feature:
         # Check the optional dependencies are up to date. We allow them to not be

From 9c555f37e30f339708dfd9a66687c4cd638aa957 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 11 Sep 2019 14:23:24 +0100
Subject: [PATCH 030/276] Add note about extra arg to send_membership_event,
 remove arg in remote_reject_invite (#6009)

Some small fixes to `room_member.py` found while doing other PRs.

1. Add requester to the base `_remote_reject_invite` method.
2. `send_membership_event`'s docstring was out of date and took in a `remote_room_hosts` arg that was not used and no calling function provided.
---
 changelog.d/6009.misc           |  1 +
 synapse/handlers/room_member.py | 12 ++----------
 2 files changed, 3 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/6009.misc

diff --git a/changelog.d/6009.misc b/changelog.d/6009.misc
new file mode 100644
index 000000000..fea479e1d
--- /dev/null
+++ b/changelog.d/6009.misc
@@ -0,0 +1 @@
+Small refactor of function arguments and docstrings in RoomMemberHandler.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 093f2ea36..a3a3d4d14 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -100,7 +100,7 @@ class RoomMemberHandler(object):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def _remote_reject_invite(self, remote_room_hosts, room_id, target):
+    def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target):
         """Attempt to reject an invite for a room this server is not in. If we
         fail to do so we locally mark the invite as rejected.
 
@@ -510,9 +510,7 @@ class RoomMemberHandler(object):
         return res
 
     @defer.inlineCallbacks
-    def send_membership_event(
-        self, requester, event, context, remote_room_hosts=None, ratelimit=True
-    ):
+    def send_membership_event(self, requester, event, context, ratelimit=True):
         """
         Change the membership status of a user in a room.
 
@@ -522,16 +520,10 @@ class RoomMemberHandler(object):
                 act as the sender, will be skipped.
             event (SynapseEvent): The membership event.
             context: The context of the event.
-            is_guest (bool): Whether the sender is a guest.
-            room_hosts ([str]): Homeservers which are likely to already be in
-                the room, and could be danced with in order to join this
-                homeserver for the first time.
             ratelimit (bool): Whether to rate limit this request.
         Raises:
             SynapseError if there was a problem changing the membership.
         """
-        remote_room_hosts = remote_room_hosts or []
-
         target_user = UserID.from_string(event.state_key)
         room_id = event.room_id
 

From 66ace43546e516938fa40b38bdd0d8d3ca9f9e31 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Sep 2019 14:50:40 +0100
Subject: [PATCH 031/276] Update sample config

---
 docs/sample_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index ce657921b..c970a1c67 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -520,7 +520,7 @@ log_config: "CONFDIR/SERVERNAME.log.config"
 #     attempts for this account.
 #   - one for ratelimiting redactions by room admins. If this is not explicitly
 #     set then it uses the same ratelimiting as per rc_message. This is useful
-#     to allow room admins to quickly deal with abuse quickly.
+#     to allow room admins to deal with abuse quickly.
 #
 # The defaults are as shown below.
 #

From 7902bf1e1d6331e7964ac498988925cc26e18f79 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 11 Sep 2019 15:14:56 +0100
Subject: [PATCH 032/276] Clean up some code in the retry logic (#6017)

* remove some unused code
* make things which were constants into constants for efficiency and clarity
---
 changelog.d/6017.misc           |  1 +
 synapse/storage/transactions.py | 20 --------------------
 synapse/util/retryutils.py      | 29 +++++++++++++----------------
 3 files changed, 14 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/6017.misc

diff --git a/changelog.d/6017.misc b/changelog.d/6017.misc
new file mode 100644
index 000000000..5ccab9c6c
--- /dev/null
+++ b/changelog.d/6017.misc
@@ -0,0 +1 @@
+Clean up some code in the retry logic.
diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py
index b3c3bf55b..d81ace0ec 100644
--- a/synapse/storage/transactions.py
+++ b/synapse/storage/transactions.py
@@ -250,26 +250,6 @@ class TransactionStore(SQLBaseStore):
                 },
             )
 
-    def get_destinations_needing_retry(self):
-        """Get all destinations which are due a retry for sending a transaction.
-
-        Returns:
-            list: A list of dicts
-        """
-
-        return self.runInteraction(
-            "get_destinations_needing_retry", self._get_destinations_needing_retry
-        )
-
-    def _get_destinations_needing_retry(self, txn):
-        query = (
-            "SELECT * FROM destinations"
-            " WHERE retry_last_ts > 0 and retry_next_ts < ?"
-        )
-
-        txn.execute(query, (self._clock.time_msec(),))
-        return self.cursor_to_dict(txn)
-
     def _start_cleanup_transactions(self):
         return run_as_background_process(
             "cleanup_transactions", self._cleanup_transactions
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 0862b5ca5..5b16a8161 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -22,6 +22,15 @@ from synapse.api.errors import CodeMessageException
 
 logger = logging.getLogger(__name__)
 
+# the intial backoff, after the first transaction fails
+MIN_RETRY_INTERVAL = 10 * 60 * 1000
+
+# how much we multiply the backoff by after each subsequent fail
+RETRY_MULTIPLIER = 5
+
+# a cap on the backoff
+MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000
+
 
 class NotRetryingDestination(Exception):
     def __init__(self, retry_last_ts, retry_interval, destination):
@@ -112,9 +121,6 @@ class RetryDestinationLimiter(object):
         clock,
         store,
         retry_interval,
-        min_retry_interval=10 * 60 * 1000,
-        max_retry_interval=24 * 60 * 60 * 1000,
-        multiplier_retry_interval=5,
         backoff_on_404=False,
         backoff_on_failure=True,
     ):
@@ -130,12 +136,6 @@ class RetryDestinationLimiter(object):
             retry_interval (int): The next retry interval taken from the
                 database in milliseconds, or zero if the last request was
                 successful.
-            min_retry_interval (int): The minimum retry interval to use after
-                a failed request, in milliseconds.
-            max_retry_interval (int): The maximum retry interval to use after
-                a failed request, in milliseconds.
-            multiplier_retry_interval (int): The multiplier to use to increase
-                the retry interval after a failed request.
             backoff_on_404 (bool): Back off if we get a 404
 
             backoff_on_failure (bool): set to False if we should not increase the
@@ -146,9 +146,6 @@ class RetryDestinationLimiter(object):
         self.destination = destination
 
         self.retry_interval = retry_interval
-        self.min_retry_interval = min_retry_interval
-        self.max_retry_interval = max_retry_interval
-        self.multiplier_retry_interval = multiplier_retry_interval
         self.backoff_on_404 = backoff_on_404
         self.backoff_on_failure = backoff_on_failure
 
@@ -196,13 +193,13 @@ class RetryDestinationLimiter(object):
         else:
             # We couldn't connect.
             if self.retry_interval:
-                self.retry_interval *= self.multiplier_retry_interval
+                self.retry_interval *= RETRY_MULTIPLIER
                 self.retry_interval *= int(random.uniform(0.8, 1.4))
 
-                if self.retry_interval >= self.max_retry_interval:
-                    self.retry_interval = self.max_retry_interval
+                if self.retry_interval >= MAX_RETRY_INTERVAL:
+                    self.retry_interval = MAX_RETRY_INTERVAL
             else:
-                self.retry_interval = self.min_retry_interval
+                self.retry_interval = MIN_RETRY_INTERVAL
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",

From 9fc71dc5eed7531454a34f8fec34bd451458c7c6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 11 Sep 2019 16:02:42 +0100
Subject: [PATCH 033/276] Use the v2 Identity Service API for lookups (MSC2134
 + MSC2140) (#5976)

This is a redo of https://github.com/matrix-org/synapse/pull/5897 but with `id_access_token` accepted.

Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus Identity Service v2 authentication ala [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140).

Identity lookup-related functions were also moved from `RoomMemberHandler` to `IdentityHandler`.
---
 changelog.d/5897.feature        |   1 +
 synapse/handlers/identity.py    |  56 ++++++----
 synapse/handlers/room.py        |   4 +-
 synapse/handlers/room_member.py | 178 +++++++++++++++++++++++++++++---
 synapse/rest/client/v1/room.py  |   1 +
 synapse/util/hash.py            |  33 ++++++
 6 files changed, 238 insertions(+), 35 deletions(-)
 create mode 100644 changelog.d/5897.feature
 create mode 100644 synapse/util/hash.py

diff --git a/changelog.d/5897.feature b/changelog.d/5897.feature
new file mode 100644
index 000000000..1557e559e
--- /dev/null
+++ b/changelog.d/5897.feature
@@ -0,0 +1 @@
+Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)).
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index f690fd04a..512f38e5a 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -74,25 +74,6 @@ class IdentityHandler(BaseHandler):
         id_access_token = creds.get("id_access_token")
         return client_secret, id_server, id_access_token
 
-    def create_id_access_token_header(self, id_access_token):
-        """Create an Authorization header for passing to SimpleHttpClient as the header value
-        of an HTTP request.
-
-        Args:
-            id_access_token (str): An identity server access token.
-
-        Returns:
-            list[str]: The ascii-encoded bearer token encased in a list.
-        """
-        # Prefix with Bearer
-        bearer_token = "Bearer %s" % id_access_token
-
-        # Encode headers to standard ascii
-        bearer_token.encode("ascii")
-
-        # Return as a list as that's how SimpleHttpClient takes header values
-        return [bearer_token]
-
     @defer.inlineCallbacks
     def threepid_from_creds(self, id_server, creds):
         """
@@ -178,9 +159,7 @@ class IdentityHandler(BaseHandler):
         bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid}
         if use_v2:
             bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
-            headers["Authorization"] = self.create_id_access_token_header(
-                id_access_token
-            )
+            headers["Authorization"] = create_id_access_token_header(id_access_token)
         else:
             bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
 
@@ -478,3 +457,36 @@ class IdentityHandler(BaseHandler):
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
+
+
+def create_id_access_token_header(id_access_token):
+    """Create an Authorization header for passing to SimpleHttpClient as the header value
+    of an HTTP request.
+
+    Args:
+        id_access_token (str): An identity server access token.
+
+    Returns:
+        list[str]: The ascii-encoded bearer token encased in a list.
+    """
+    # Prefix with Bearer
+    bearer_token = "Bearer %s" % id_access_token
+
+    # Encode headers to standard ascii
+    bearer_token.encode("ascii")
+
+    # Return as a list as that's how SimpleHttpClient takes header values
+    return [bearer_token]
+
+
+class LookupAlgorithm:
+    """
+    Supported hashing algorithms when performing a 3PID lookup.
+
+    SHA256 - Hashing an (address, medium, pepper) combo with sha256, then url-safe base64
+        encoding
+    NONE - Not performing any hashing. Simply sending an (address, medium) combo in plaintext
+    """
+
+    SHA256 = "sha256"
+    NONE = "none"
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index a509e11d6..970be3c84 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -579,8 +579,8 @@ class RoomCreationHandler(BaseHandler):
 
         room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public)
 
+        directory_handler = self.hs.get_handlers().directory_handler
         if room_alias:
-            directory_handler = self.hs.get_handlers().directory_handler
             yield directory_handler.create_association(
                 requester=requester,
                 room_id=room_id,
@@ -665,6 +665,7 @@ class RoomCreationHandler(BaseHandler):
 
         for invite_3pid in invite_3pid_list:
             id_server = invite_3pid["id_server"]
+            id_access_token = invite_3pid.get("id_access_token")  # optional
             address = invite_3pid["address"]
             medium = invite_3pid["medium"]
             yield self.hs.get_room_member_handler().do_3pid_invite(
@@ -675,6 +676,7 @@ class RoomCreationHandler(BaseHandler):
                 id_server,
                 requester,
                 txn_id=None,
+                id_access_token=id_access_token,
             )
 
         result = {"room_id": room_id}
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index a3a3d4d14..43d10a530 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -29,9 +29,11 @@ from twisted.internet import defer
 from synapse import types
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError
+from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header
 from synapse.types import RoomID, UserID
 from synapse.util.async_helpers import Linearizer
 from synapse.util.distributor import user_joined_room, user_left_room
+from synapse.util.hash import sha256_and_url_safe_base64
 
 from ._base import BaseHandler
 
@@ -626,7 +628,7 @@ class RoomMemberHandler(object):
             servers.remove(room_alias.domain)
         servers.insert(0, room_alias.domain)
 
-        return (RoomID.from_string(room_id), servers)
+        return RoomID.from_string(room_id), servers
 
     @defer.inlineCallbacks
     def _get_inviter(self, user_id, room_id):
@@ -638,7 +640,15 @@ class RoomMemberHandler(object):
 
     @defer.inlineCallbacks
     def do_3pid_invite(
-        self, room_id, inviter, medium, address, id_server, requester, txn_id
+        self,
+        room_id,
+        inviter,
+        medium,
+        address,
+        id_server,
+        requester,
+        txn_id,
+        id_access_token=None,
     ):
         if self.config.block_non_admin_invites:
             is_requester_admin = yield self.auth.is_server_admin(requester.user)
@@ -661,7 +671,12 @@ class RoomMemberHandler(object):
                 Codes.FORBIDDEN,
             )
 
-        invitee = yield self._lookup_3pid(id_server, medium, address)
+        if not self._enable_lookup:
+            raise SynapseError(
+                403, "Looking up third-party identifiers is denied from this server"
+            )
+
+        invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token)
 
         if invitee:
             yield self.update_membership(
@@ -673,9 +688,47 @@ class RoomMemberHandler(object):
             )
 
     @defer.inlineCallbacks
-    def _lookup_3pid(self, id_server, medium, address):
+    def _lookup_3pid(self, id_server, medium, address, id_access_token=None):
         """Looks up a 3pid in the passed identity server.
 
+        Args:
+            id_server (str): The server name (including port, if required)
+                of the identity server to use.
+            medium (str): The type of the third party identifier (e.g. "email").
+            address (str): The third party identifier (e.g. "foo@example.com").
+            id_access_token (str|None): The access token to authenticate to the identity
+                server with
+
+        Returns:
+            str|None: the matrix ID of the 3pid, or None if it is not recognized.
+        """
+        if id_access_token is not None:
+            try:
+                results = yield self._lookup_3pid_v2(
+                    id_server, id_access_token, medium, address
+                )
+                return results
+
+            except Exception as e:
+                # Catch HttpResponseExcept for a non-200 response code
+                # Check if this identity server does not know about v2 lookups
+                if isinstance(e, HttpResponseException) and e.code == 404:
+                    # This is an old identity server that does not yet support v2 lookups
+                    logger.warning(
+                        "Attempted v2 lookup on v1 identity server %s. Falling "
+                        "back to v1",
+                        id_server,
+                    )
+                else:
+                    logger.warning("Error when looking up hashing details: %s", e)
+                    return None
+
+        return (yield self._lookup_3pid_v1(id_server, medium, address))
+
+    @defer.inlineCallbacks
+    def _lookup_3pid_v1(self, id_server, medium, address):
+        """Looks up a 3pid in the passed identity server using v1 lookup.
+
         Args:
             id_server (str): The server name (including port, if required)
                 of the identity server to use.
@@ -685,10 +738,6 @@ class RoomMemberHandler(object):
         Returns:
             str: the matrix ID of the 3pid, or None if it is not recognized.
         """
-        if not self._enable_lookup:
-            raise SynapseError(
-                403, "Looking up third-party identifiers is denied from this server"
-            )
         try:
             data = yield self.simple_http_client.get_json(
                 "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server),
@@ -702,9 +751,116 @@ class RoomMemberHandler(object):
                 return data["mxid"]
 
         except IOError as e:
-            logger.warn("Error from identity server lookup: %s" % (e,))
+            logger.warning("Error from v1 identity server lookup: %s" % (e,))
+
+        return None
+
+    @defer.inlineCallbacks
+    def _lookup_3pid_v2(self, id_server, id_access_token, medium, address):
+        """Looks up a 3pid in the passed identity server using v2 lookup.
+
+        Args:
+            id_server (str): The server name (including port, if required)
+                of the identity server to use.
+            id_access_token (str): The access token to authenticate to the identity server with
+            medium (str): The type of the third party identifier (e.g. "email").
+            address (str): The third party identifier (e.g. "foo@example.com").
+
+        Returns:
+            Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised.
+        """
+        # Check what hashing details are supported by this identity server
+        hash_details = yield self.simple_http_client.get_json(
+            "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
+            {"access_token": id_access_token},
+        )
+
+        if not isinstance(hash_details, dict):
+            logger.warning(
+                "Got non-dict object when checking hash details of %s%s: %s",
+                id_server_scheme,
+                id_server,
+                hash_details,
+            )
+            raise SynapseError(
+                400,
+                "Non-dict object from %s%s during v2 hash_details request: %s"
+                % (id_server_scheme, id_server, hash_details),
+            )
+
+        # Extract information from hash_details
+        supported_lookup_algorithms = hash_details.get("algorithms")
+        lookup_pepper = hash_details.get("lookup_pepper")
+        if (
+            not supported_lookup_algorithms
+            or not isinstance(supported_lookup_algorithms, list)
+            or not lookup_pepper
+            or not isinstance(lookup_pepper, str)
+        ):
+            raise SynapseError(
+                400,
+                "Invalid hash details received from identity server %s%s: %s"
+                % (id_server_scheme, id_server, hash_details),
+            )
+
+        # Check if any of the supported lookup algorithms are present
+        if LookupAlgorithm.SHA256 in supported_lookup_algorithms:
+            # Perform a hashed lookup
+            lookup_algorithm = LookupAlgorithm.SHA256
+
+            # Hash address, medium and the pepper with sha256
+            to_hash = "%s %s %s" % (address, medium, lookup_pepper)
+            lookup_value = sha256_and_url_safe_base64(to_hash)
+
+        elif LookupAlgorithm.NONE in supported_lookup_algorithms:
+            # Perform a non-hashed lookup
+            lookup_algorithm = LookupAlgorithm.NONE
+
+            # Combine together plaintext address and medium
+            lookup_value = "%s %s" % (address, medium)
+
+        else:
+            logger.warning(
+                "None of the provided lookup algorithms of %s are supported: %s",
+                id_server,
+                supported_lookup_algorithms,
+            )
+            raise SynapseError(
+                400,
+                "Provided identity server does not support any v2 lookup "
+                "algorithms that this homeserver supports.",
+            )
+
+        # Authenticate with identity server given the access token from the client
+        headers = {"Authorization": create_id_access_token_header(id_access_token)}
+
+        try:
+            lookup_results = yield self.simple_http_client.post_json_get_json(
+                "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server),
+                {
+                    "addresses": [lookup_value],
+                    "algorithm": lookup_algorithm,
+                    "pepper": lookup_pepper,
+                },
+                headers=headers,
+            )
+        except Exception as e:
+            logger.warning("Error when performing a v2 3pid lookup: %s", e)
+            raise SynapseError(
+                500, "Unknown error occurred during identity server lookup"
+            )
+
+        # Check for a mapping from what we looked up to an MXID
+        if "mappings" not in lookup_results or not isinstance(
+            lookup_results["mappings"], dict
+        ):
+            logger.warning("No results from 3pid lookup")
             return None
 
+        # Return the MXID if it's available, or None otherwise
+        mxid = lookup_results["mappings"].get(lookup_value)
+        return mxid
+
     @defer.inlineCallbacks
     def _verify_any_signature(self, data, server_hostname):
         if server_hostname not in data["signatures"]:
@@ -844,7 +1000,6 @@ class RoomMemberHandler(object):
                 display_name (str): A user-friendly name to represent the invited
                     user.
         """
-
         is_url = "%s%s/_matrix/identity/api/v1/store-invite" % (
             id_server_scheme,
             id_server,
@@ -862,7 +1017,6 @@ class RoomMemberHandler(object):
             "sender_display_name": inviter_display_name,
             "sender_avatar_url": inviter_avatar_url,
         }
-
         try:
             data = yield self.simple_http_client.post_json_get_json(
                 is_url, invite_config
@@ -1049,7 +1203,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
             # The 'except' clause is very broad, but we need to
             # capture everything from DNS failures upwards
             #
-            logger.warn("Failed to reject invite: %s", e)
+            logger.warning("Failed to reject invite: %s", e)
 
             yield self.store.locally_reject_invite(target.to_string(), room_id)
             return {}
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 358225902..a6a7b3b57 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -701,6 +701,7 @@ class RoomMembershipRestServlet(TransactionRestServlet):
                 content["id_server"],
                 requester,
                 txn_id,
+                content.get("id_access_token"),
             )
             return 200, {}
 
diff --git a/synapse/util/hash.py b/synapse/util/hash.py
new file mode 100644
index 000000000..359168704
--- /dev/null
+++ b/synapse/util/hash.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hashlib
+
+import unpaddedbase64
+
+
+def sha256_and_url_safe_base64(input_text):
+    """SHA256 hash an input string, encode the digest as url-safe base64, and
+    return
+
+    :param input_text: string to hash
+    :type input_text: str
+
+    :returns a sha256 hashed and url-safe base64 encoded digest
+    :rtype: str
+    """
+    digest = hashlib.sha256(input_text.encode()).digest()
+    return unpaddedbase64.encode_base64(digest, urlsafe=True)

From 6d847d8ce69f2cb849633265aaeb4a9df4ff713d Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@matrix.org>
Date: Wed, 11 Sep 2019 20:22:18 +0300
Subject: [PATCH 034/276] Ensure support users can be registered even if MAU
 limit is reached

This allows support users to be created even on MAU limits via
the admin API. Support users are excluded from MAU after creation,
so it makes sense to exclude them in creation - except if the
whole host is in disabled state.

Signed-off-by: Jason Robinson <jasonr@matrix.org>
---
 changelog.d/6020.bugfix |  1 +
 synapse/api/auth.py     | 11 +++++++++--
 tests/api/test_auth.py  | 18 ++++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6020.bugfix

diff --git a/changelog.d/6020.bugfix b/changelog.d/6020.bugfix
new file mode 100644
index 000000000..58a7deba9
--- /dev/null
+++ b/changelog.d/6020.bugfix
@@ -0,0 +1 @@
+Ensure support users can be registered even if MAU limit is reached.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index ddc195bc3..9e445cd80 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -25,7 +25,7 @@ from twisted.internet import defer
 import synapse.logging.opentracing as opentracing
 import synapse.types
 from synapse import event_auth
-from synapse.api.constants import EventTypes, JoinRules, Membership
+from synapse.api.constants import EventTypes, JoinRules, Membership, UserTypes
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -709,7 +709,7 @@ class Auth(object):
             )
 
     @defer.inlineCallbacks
-    def check_auth_blocking(self, user_id=None, threepid=None):
+    def check_auth_blocking(self, user_id=None, threepid=None, user_type=None):
         """Checks if the user should be rejected for some external reason,
         such as monthly active user limiting or global disable flag
 
@@ -722,6 +722,9 @@ class Auth(object):
                 with a MAU blocked server, normally they would be rejected but their
                 threepid is on the reserved list. user_id and
                 threepid should never be set at the same time.
+
+            user_type(str|None): If present, is used to decide whether to check against
+                certain blocking reasons like MAU.
         """
 
         # Never fail an auth check for the server notices users or support user
@@ -759,6 +762,10 @@ class Auth(object):
                     self.hs.config.mau_limits_reserved_threepids, threepid
                 ):
                     return
+            elif user_type == UserTypes.SUPPORT:
+                # If the user does not exist yet and is of type "support",
+                # allow registration. Support users are excluded from MAU checks.
+                return
             # Else if there is no room in the MAU bucket, bail
             current_mau = yield self.store.get_monthly_active_count()
             if current_mau >= self.hs.config.max_mau_value:
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index c0cb8ef29..6121efcfa 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -21,6 +21,7 @@ from twisted.internet import defer
 
 import synapse.handlers.auth
 from synapse.api.auth import Auth
+from synapse.api.constants import UserTypes
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -335,6 +336,23 @@ class AuthTestCase(unittest.TestCase):
         )
         yield self.auth.check_auth_blocking()
 
+    @defer.inlineCallbacks
+    def test_blocking_mau__depending_on_user_type(self):
+        self.hs.config.max_mau_value = 50
+        self.hs.config.limit_usage_by_mau = True
+
+        self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100))
+        # Support users allowed
+        yield self.auth.check_auth_blocking(user_type=UserTypes.SUPPORT)
+        self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100))
+        # Bots not allowed
+        with self.assertRaises(ResourceLimitError):
+            yield self.auth.check_auth_blocking(user_type=UserTypes.BOT)
+        self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100))
+        # Real users not allowed
+        with self.assertRaises(ResourceLimitError):
+            yield self.auth.check_auth_blocking()
+
     @defer.inlineCallbacks
     def test_reserved_threepid(self):
         self.hs.config.limit_usage_by_mau = True

From a8251da10f98a251b9aa0be1f313d8d2e4ac1c3f Mon Sep 17 00:00:00 2001
From: Jorik Schellekens <joriksch@gmail.com>
Date: Thu, 12 Sep 2019 10:57:37 +0100
Subject: [PATCH 035/276] Blow up config if opentracing is missing (#5985)

* Blow up config if opentracing is missing
---
 changelog.d/5985.feature | 1 +
 synapse/config/tracer.py | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 changelog.d/5985.feature

diff --git a/changelog.d/5985.feature b/changelog.d/5985.feature
new file mode 100644
index 000000000..e5e29504a
--- /dev/null
+++ b/changelog.d/5985.feature
@@ -0,0 +1 @@
+Check at setup that opentracing is installed if it's enabled in the config.
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index 95e7ccb3a..85d99a316 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from synapse.python_dependencies import DependencyException, check_requirements
+
 from ._base import Config, ConfigError
 
 
@@ -32,6 +34,11 @@ class TracerConfig(Config):
         if not self.opentracer_enabled:
             return
 
+        try:
+            check_requirements("opentracing")
+        except DependencyException as e:
+            raise ConfigError(e.message)
+
         # The tracer is enabled so sanitize the config
 
         self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", [])

From dd2e5b0038dbe9812775e5943e5bccf550d7468a Mon Sep 17 00:00:00 2001
From: Sorunome <mail@sorunome.de>
Date: Thu, 12 Sep 2019 12:24:57 +0200
Subject: [PATCH 036/276] add report_stats_endpoint config option (#6012)

This PR adds the optional `report_stats_endpoint` to configure where stats are reported to, if enabled.
---
 changelog.d/6012.feature  | 1 +
 docs/sample_config.yaml   | 5 +++++
 synapse/app/homeserver.py | 6 ++++--
 synapse/config/metrics.py | 9 +++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6012.feature

diff --git a/changelog.d/6012.feature b/changelog.d/6012.feature
new file mode 100644
index 000000000..25425510c
--- /dev/null
+++ b/changelog.d/6012.feature
@@ -0,0 +1 @@
+Add report_stats_endpoint option to configure where stats are reported to, if enabled. Contributed by @Sorunome.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index c970a1c67..dd4e2d5eb 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -985,6 +985,11 @@ metrics_flags:
 # Whether or not to report anonymized homeserver usage statistics.
 # report_stats: true|false
 
+# The endpoint to report the anonymized homeserver usage statistics to.
+# Defaults to https://matrix.org/report-usage-stats/push
+#
+#report_stats_endpoint: https://example.com/report-usage-stats/push
+
 
 ## API Configuration ##
 
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 04f1ed14f..774326dff 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -561,10 +561,12 @@ def run(hs):
 
         stats["database_engine"] = hs.get_datastore().database_engine_name
         stats["database_server_version"] = hs.get_datastore().get_server_version()
-        logger.info("Reporting stats to matrix.org: %s" % (stats,))
+        logger.info(
+            "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)
+        )
         try:
             yield hs.get_simple_http_client().put_json(
-                "https://matrix.org/report-usage-stats/push", stats
+                hs.config.report_stats_endpoint, stats
             )
         except Exception as e:
             logger.warn("Error reporting stats: %s", e)
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index 9eb1e55dd..ec35a6b86 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -37,6 +37,9 @@ class MetricsConfig(Config):
     def read_config(self, config, **kwargs):
         self.enable_metrics = config.get("enable_metrics", False)
         self.report_stats = config.get("report_stats", None)
+        self.report_stats_endpoint = config.get(
+            "report_stats_endpoint", "https://matrix.org/report-usage-stats/push"
+        )
         self.metrics_port = config.get("metrics_port")
         self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1")
 
@@ -95,4 +98,10 @@ class MetricsConfig(Config):
         else:
             res += "report_stats: %s\n" % ("true" if report_stats else "false")
 
+        res += """
+        # The endpoint to report the anonymized homeserver usage statistics to.
+        # Defaults to https://matrix.org/report-usage-stats/push
+        #
+        #report_stats_endpoint: https://example.com/report-usage-stats/push
+        """
         return res

From 642fad8bd47ffcb74d970de632a7316dfc15d26b Mon Sep 17 00:00:00 2001
From: David Baker <dave@matrix.org>
Date: Thu, 12 Sep 2019 11:42:47 +0100
Subject: [PATCH 037/276] Fix SSO fallback login

Well, it worked, but forgot to remove the thing saying login was
unavailable.
---
 synapse/static/client/login/js/login.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js
index e02663f50..276c271bb 100644
--- a/synapse/static/client/login/js/login.js
+++ b/synapse/static/client/login/js/login.js
@@ -62,7 +62,7 @@ var show_login = function() {
         $("#sso_flow").show();
     }
 
-    if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) {
+    if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas && !matrixLogin.serverAcceptsSso) {
         $("#no_login_types").show();
     }
 };

From 6db22e4702fcaa7b4f6b814e215071d5aca6893e Mon Sep 17 00:00:00 2001
From: David Baker <dave@matrix.org>
Date: Thu, 12 Sep 2019 11:46:37 +0100
Subject: [PATCH 038/276] changelog

---
 changelog.d/6024.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6024.bugfix

diff --git a/changelog.d/6024.bugfix b/changelog.d/6024.bugfix
new file mode 100644
index 000000000..ddad34595
--- /dev/null
+++ b/changelog.d/6024.bugfix
@@ -0,0 +1 @@
+Fix bug where login error was shown incorrectly on SSO fallback login.

From 0388beafe48d1ae9c30565c37b8902b9aa0b8fe2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 12 Sep 2019 12:59:43 +0100
Subject: [PATCH 039/276] Fix bug in calculating the federation retry backoff
 period (#6025)

This was intended to introduce an element of jitter; instead it gave you a
30/60 chance of resetting to zero.
---
 changelog.d/6025.bugfix    | 1 +
 synapse/util/retryutils.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6025.bugfix

diff --git a/changelog.d/6025.bugfix b/changelog.d/6025.bugfix
new file mode 100644
index 000000000..50d7f9aab
--- /dev/null
+++ b/changelog.d/6025.bugfix
@@ -0,0 +1 @@
+Fix bug in calculating the federation retry backoff period.
\ No newline at end of file
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 5b16a8161..33263fe20 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -193,8 +193,9 @@ class RetryDestinationLimiter(object):
         else:
             # We couldn't connect.
             if self.retry_interval:
-                self.retry_interval *= RETRY_MULTIPLIER
-                self.retry_interval *= int(random.uniform(0.8, 1.4))
+                self.retry_interval = int(
+                    self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4)
+                )
 
                 if self.retry_interval >= MAX_RETRY_INTERVAL:
                     self.retry_interval = MAX_RETRY_INTERVAL

From 3d882a7ba52114f18ec6be61c51561db203a0534 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 12 Sep 2019 13:00:13 +0100
Subject: [PATCH 040/276] Remove the cap on federation retry interval. (#6026)

Essentially the intention here is to end up blacklisting servers which never
respond to federation requests.

Fixes https://github.com/matrix-org/synapse/issues/5113.
---
 changelog.d/6026.feature   | 1 +
 synapse/util/retryutils.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6026.feature

diff --git a/changelog.d/6026.feature b/changelog.d/6026.feature
new file mode 100644
index 000000000..2489ff09b
--- /dev/null
+++ b/changelog.d/6026.feature
@@ -0,0 +1 @@
+Stop sending federation transactions to servers which have been down for a long time.
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 33263fe20..b740913b5 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -28,8 +28,8 @@ MIN_RETRY_INTERVAL = 10 * 60 * 1000
 # how much we multiply the backoff by after each subsequent fail
 RETRY_MULTIPLIER = 5
 
-# a cap on the backoff
-MAX_RETRY_INTERVAL = 24 * 60 * 60 * 1000
+# a cap on the backoff. (Essentially none)
+MAX_RETRY_INTERVAL = 2 ** 63
 
 
 class NotRetryingDestination(Exception):

From b617864cd9f81109e818bc5ae95bee317d917b72 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Fri, 13 Sep 2019 02:29:55 +1000
Subject: [PATCH 041/276] Fix for structured logging tests stomping on logs
 (#6023)

---
 MANIFEST.in                      | 12 ++++---
 changelog.d/6023.misc            |  1 +
 mypy.ini                         | 54 ++++++++++++++++++++++++++++++++
 synapse/config/logger.py         | 33 ++++++++++++++-----
 synapse/logging/_structured.py   |  8 ++---
 synapse/logging/_terse_json.py   |  8 +++--
 synapse/logging/opentracing.py   |  4 +--
 synapse/metrics/__init__.py      |  5 ++-
 synapse/metrics/_exposition.py   |  4 ++-
 synapse/python_dependencies.py   |  7 +++--
 tests/logging/test_structured.py | 25 ++++++++++++---
 tests/logging/test_terse_json.py |  4 +--
 tox.ini                          | 30 ++++++++++++++----
 13 files changed, 154 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/6023.misc
 create mode 100644 mypy.ini

diff --git a/MANIFEST.in b/MANIFEST.in
index 919cd8a1c..9c2902b8d 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -38,14 +38,16 @@ exclude sytest-blacklist
 include pyproject.toml
 recursive-include changelog.d *
 
+prune .buildkite
+prune .circleci
+prune .codecov.yml
+prune .coveragerc
 prune .github
+prune debian
 prune demo/etc
 prune docker
-prune .circleci
-prune .coveragerc
-prune debian
-prune .codecov.yml
-prune .buildkite
+prune mypy.ini
+prune stubs
 
 exclude jenkins*
 recursive-exclude jenkins *.sh
diff --git a/changelog.d/6023.misc b/changelog.d/6023.misc
new file mode 100644
index 000000000..d80410c22
--- /dev/null
+++ b/changelog.d/6023.misc
@@ -0,0 +1 @@
+Fix the structured logging tests stomping on the global log configuration for subsequent tests.
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 000000000..8788574ee
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,54 @@
+[mypy]
+namespace_packages=True
+plugins=mypy_zope:plugin
+follow_imports=skip
+mypy_path=stubs
+
+[mypy-synapse.config.homeserver]
+# this is a mess because of the metaclass shenanigans
+ignore_errors = True
+
+[mypy-zope]
+ignore_missing_imports = True
+
+[mypy-constantly]
+ignore_missing_imports = True
+
+[mypy-twisted.*]
+ignore_missing_imports = True
+
+[mypy-treq.*]
+ignore_missing_imports = True
+
+[mypy-hyperlink]
+ignore_missing_imports = True
+
+[mypy-h11]
+ignore_missing_imports = True
+
+[mypy-opentracing]
+ignore_missing_imports = True
+
+[mypy-OpenSSL]
+ignore_missing_imports = True
+
+[mypy-netaddr]
+ignore_missing_imports = True
+
+[mypy-saml2.*]
+ignore_missing_imports = True
+
+[mypy-unpaddedbase64]
+ignore_missing_imports = True
+
+[mypy-canonicaljson]
+ignore_missing_imports = True
+
+[mypy-jaeger_client]
+ignore_missing_imports = True
+
+[mypy-jsonschema]
+ignore_missing_imports = True
+
+[mypy-signedjson.*]
+ignore_missing_imports = True
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 2704c1872..767ecfdf0 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -21,7 +21,12 @@ from string import Template
 
 import yaml
 
-from twisted.logger import STDLibLogObserver, globalLogBeginner
+from twisted.logger import (
+    ILogObserver,
+    LogBeginner,
+    STDLibLogObserver,
+    globalLogBeginner,
+)
 
 import synapse
 from synapse.app import _base as appbase
@@ -124,7 +129,7 @@ class LoggingConfig(Config):
                 log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file))
 
 
-def _setup_stdlib_logging(config, log_config):
+def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner):
     """
     Set up Python stdlib logging.
     """
@@ -165,12 +170,12 @@ def _setup_stdlib_logging(config, log_config):
 
         return observer(event)
 
-    globalLogBeginner.beginLoggingTo(
-        [_log], redirectStandardIO=not config.no_redirect_stdio
-    )
+    logBeginner.beginLoggingTo([_log], redirectStandardIO=not config.no_redirect_stdio)
     if not config.no_redirect_stdio:
         print("Redirected stdout/stderr to logs")
 
+    return observer
+
 
 def _reload_stdlib_logging(*args, log_config=None):
     logger = logging.getLogger("")
@@ -181,7 +186,9 @@ def _reload_stdlib_logging(*args, log_config=None):
     logging.config.dictConfig(log_config)
 
 
-def setup_logging(hs, config, use_worker_options=False):
+def setup_logging(
+    hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner
+) -> ILogObserver:
     """
     Set up the logging subsystem.
 
@@ -191,6 +198,12 @@ def setup_logging(hs, config, use_worker_options=False):
 
         use_worker_options (bool): True to use the 'worker_log_config' option
             instead of 'log_config'.
+
+        logBeginner: The Twisted logBeginner to use.
+
+    Returns:
+        The "root" Twisted Logger observer, suitable for sending logs to from a
+        Logger instance.
     """
     log_config = config.worker_log_config if use_worker_options else config.log_config
 
@@ -210,10 +223,12 @@ def setup_logging(hs, config, use_worker_options=False):
     log_config_body = read_config()
 
     if log_config_body and log_config_body.get("structured") is True:
-        setup_structured_logging(hs, config, log_config_body)
+        logger = setup_structured_logging(
+            hs, config, log_config_body, logBeginner=logBeginner
+        )
         appbase.register_sighup(read_config, callback=reload_structured_logging)
     else:
-        _setup_stdlib_logging(config, log_config_body)
+        logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner)
         appbase.register_sighup(read_config, callback=_reload_stdlib_logging)
 
     # make sure that the first thing we log is a thing we can grep backwards
@@ -221,3 +236,5 @@ def setup_logging(hs, config, use_worker_options=False):
     logging.warn("***** STARTING SERVER *****")
     logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse))
     logging.info("Server hostname: %s", config.server_name)
+
+    return logger
diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py
index 0367d6dfc..3220e985a 100644
--- a/synapse/logging/_structured.py
+++ b/synapse/logging/_structured.py
@@ -18,6 +18,7 @@ import os.path
 import sys
 import typing
 import warnings
+from typing import List
 
 import attr
 from constantly import NamedConstant, Names, ValueConstant, Values
@@ -33,7 +34,6 @@ from twisted.logger import (
     LogLevelFilterPredicate,
     LogPublisher,
     eventAsText,
-    globalLogBeginner,
     jsonFileLogObserver,
 )
 
@@ -134,7 +134,7 @@ class PythonStdlibToTwistedLogger(logging.Handler):
         )
 
 
-def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver:
+def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver:
     """
     A log observer that formats events like the traditional log formatter and
     sends them to `outFile`.
@@ -265,7 +265,7 @@ def setup_structured_logging(
     hs,
     config,
     log_config: dict,
-    logBeginner: LogBeginner = globalLogBeginner,
+    logBeginner: LogBeginner,
     redirect_stdlib_logging: bool = True,
 ) -> LogPublisher:
     """
@@ -286,7 +286,7 @@ def setup_structured_logging(
     if "drains" not in log_config:
         raise ConfigError("The logging configuration requires a list of drains.")
 
-    observers = []
+    observers = []  # type: List[ILogObserver]
 
     for observer in parse_drain_configs(log_config["drains"]):
         # Pipe drains
diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py
index 7f1e8f23f..0ebbde06f 100644
--- a/synapse/logging/_terse_json.py
+++ b/synapse/logging/_terse_json.py
@@ -21,10 +21,11 @@ import sys
 from collections import deque
 from ipaddress import IPv4Address, IPv6Address, ip_address
 from math import floor
-from typing.io import TextIO
+from typing import IO
 
 import attr
 from simplejson import dumps
+from zope.interface import implementer
 
 from twisted.application.internet import ClientService
 from twisted.internet.endpoints import (
@@ -33,7 +34,7 @@ from twisted.internet.endpoints import (
     TCP6ClientEndpoint,
 )
 from twisted.internet.protocol import Factory, Protocol
-from twisted.logger import FileLogObserver, Logger
+from twisted.logger import FileLogObserver, ILogObserver, Logger
 from twisted.python.failure import Failure
 
 
@@ -129,7 +130,7 @@ def flatten_event(event: dict, metadata: dict, include_time: bool = False):
     return new_event
 
 
-def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver:
+def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver:
     """
     A log observer that formats events to a flattened JSON representation.
 
@@ -146,6 +147,7 @@ def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObs
 
 
 @attr.s
+@implementer(ILogObserver)
 class TerseJSONToTCPLogObserver(object):
     """
     An IObserver that writes JSON logs to a TCP target.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 724625301..308a27213 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -223,8 +223,8 @@ try:
     from jaeger_client import Config as JaegerConfig
     from synapse.logging.scopecontextmanager import LogContextScopeManager
 except ImportError:
-    JaegerConfig = None
-    LogContextScopeManager = None
+    JaegerConfig = None  # type: ignore
+    LogContextScopeManager = None  # type: ignore
 
 
 logger = logging.getLogger(__name__)
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index b5c9595cb..bec3b1339 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -20,6 +20,7 @@ import os
 import platform
 import threading
 import time
+from typing import Dict, Union
 
 import six
 
@@ -42,9 +43,7 @@ logger = logging.getLogger(__name__)
 METRICS_PREFIX = "/_synapse/metrics"
 
 running_on_pypy = platform.python_implementation() == "PyPy"
-all_metrics = []
-all_collectors = []
-all_gauges = {}
+all_gauges = {}  # type: Dict[str, Union[LaterGauge, InFlightGauge, BucketCollector]]
 
 HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
 
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
index 1933ecd3e..74d9c3ecd 100644
--- a/synapse/metrics/_exposition.py
+++ b/synapse/metrics/_exposition.py
@@ -36,7 +36,9 @@ from twisted.web.resource import Resource
 try:
     from prometheus_client.samples import Sample
 except ImportError:
-    Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"])
+    Sample = namedtuple(
+        "Sample", ["name", "labels", "value", "timestamp", "exemplar"]
+    )  # type: ignore
 
 
 CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8")
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 07345e916..0bd563edc 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import logging
+from typing import Set
 
 from pkg_resources import (
     DistributionNotFound,
@@ -97,7 +98,7 @@ CONDITIONAL_REQUIREMENTS = {
     "jwt": ["pyjwt>=1.6.4"],
 }
 
-ALL_OPTIONAL_REQUIREMENTS = set()
+ALL_OPTIONAL_REQUIREMENTS = set()  # type: Set[str]
 
 for name, optional_deps in CONDITIONAL_REQUIREMENTS.items():
     # Exclude systemd as it's a system-based requirement.
@@ -174,8 +175,8 @@ def check_requirements(for_feature=None):
                 pass
 
     if deps_needed:
-        for e in errors:
-            logging.error(e)
+        for err in errors:
+            logging.error(err)
 
         raise DependencyException(deps_needed)
 
diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py
index a786de023..451d05c0f 100644
--- a/tests/logging/test_structured.py
+++ b/tests/logging/test_structured.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import os
 import os.path
 import shutil
@@ -33,7 +34,20 @@ class FakeBeginner(object):
         self.observers = observers
 
 
-class StructuredLoggingTestCase(HomeserverTestCase):
+class StructuredLoggingTestBase(object):
+    """
+    Test base that registers a cleanup handler to reset the stdlib log handler
+    to 'unset'.
+    """
+
+    def prepare(self, reactor, clock, hs):
+        def _cleanup():
+            logging.getLogger("synapse").setLevel(logging.NOTSET)
+
+        self.addCleanup(_cleanup)
+
+
+class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase):
     """
     Tests for Synapse's structured logging support.
     """
@@ -139,7 +153,9 @@ class StructuredLoggingTestCase(HomeserverTestCase):
         self.assertEqual(logs[0]["request"], "somereq")
 
 
-class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase):
+class StructuredLoggingConfigurationFileTestCase(
+    StructuredLoggingTestBase, HomeserverTestCase
+):
     def make_homeserver(self, reactor, clock):
 
         tempdir = self.mktemp()
@@ -179,10 +195,11 @@ class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase):
         """
         When a structured logging config is given, Synapse will use it.
         """
-        setup_logging(self.hs, self.hs.config)
+        beginner = FakeBeginner()
+        publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner)
 
         # Make a logger and send an event
-        logger = Logger(namespace="tests.logging.test_structured")
+        logger = Logger(namespace="tests.logging.test_structured", observer=publisher)
 
         with LoggingContext("testcontext", request="somereq"):
             logger.info("Hello there, {name}!", name="steve")
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index 514282591..4cf81f712 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -23,10 +23,10 @@ from synapse.logging._structured import setup_structured_logging
 from tests.server import connect_client
 from tests.unittest import HomeserverTestCase
 
-from .test_structured import FakeBeginner
+from .test_structured import FakeBeginner, StructuredLoggingTestBase
 
 
-class TerseJSONTCPTestCase(HomeserverTestCase):
+class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase):
     def test_log_output(self):
         """
         The Terse JSON outputter delivers simplified structured logs over TCP.
diff --git a/tox.ini b/tox.ini
index 7cb40847b..1bce10a4c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,6 +2,7 @@
 envlist = packaging, py35, py36, py37, check_codestyle, check_isort
 
 [base]
+basepython = python3.7
 deps =
     mock
     python-subunit
@@ -137,18 +138,35 @@ commands = {toxinidir}/scripts-dev/generate_sample_config --check
 skip_install = True
 deps =
     coverage
-whitelist_externals =
-    bash
 commands=
     coverage combine
     coverage report
 
+[testenv:cov-erase]
+skip_install = True
+deps =
+    coverage
+commands=
+    coverage erase
+
+[testenv:cov-html]
+skip_install = True
+deps =
+    coverage
+commands=
+    coverage html
+
 [testenv:mypy]
-basepython = python3.5
+basepython = python3.7
+skip_install = True
 deps =
     {[base]deps}
     mypy
+    mypy-zope
+    typeshed
+env =
+    MYPYPATH = stubs/
 extras = all
-commands = mypy --ignore-missing-imports \
-            synapse/logging/_structured.py \
-            synapse/logging/_terse_json.py
+commands = mypy --show-traceback \
+            synapse/logging/ \
+            synapse/config/

From 9eaa5d6d2427a6c3edcdf18c0868c697c17fd6d4 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 12 Sep 2019 21:13:31 +0100
Subject: [PATCH 042/276] README: link to reverse_proxy.rst (#6027)

---
 README.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/README.rst b/README.rst
index bbff8de5a..fbbf958d6 100644
--- a/README.rst
+++ b/README.rst
@@ -381,3 +381,16 @@ indicate that your server is also issuing far more outgoing federation
 requests than can be accounted for by your users' activity, this is a
 likely cause. The misbehavior can be worked around by setting
 ``use_presence: false`` in the Synapse config file.
+
+People can't accept room invitations from me
+--------------------------------------------
+
+The typical failure mode here is that you send an invitation to someone 
+to join a room or direct chat, but when they go to accept it, they get an
+error (typically along the lines of "Invalid signature"). They might see
+something like the following in their logs::
+
+    2019-09-11 19:32:04,271 - synapse.federation.transport.server - 288 - WARNING - GET-11752 - authenticate_request failed: 401: Invalid signature for server <server> with key ed25519:a_EqML: Unable to verify signature for <server>
+
+This is normally caused by a misconfiguration in your reverse-proxy. See
+`<docs/reverse_proxy.rst>`_ and double-check that your settings are correct.

From 1c7df13e7b26f249726380cbec5a6bc7bb3daeb6 Mon Sep 17 00:00:00 2001
From: axel simon <github@axelsimon.net>
Date: Fri, 13 Sep 2019 09:50:17 +0200
Subject: [PATCH 043/276] add explanations on how to actually include an
 access_token (#6031)

---
 docs/admin_api/README.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/admin_api/README.rst b/docs/admin_api/README.rst
index d4f564cfa..191806c5b 100644
--- a/docs/admin_api/README.rst
+++ b/docs/admin_api/README.rst
@@ -10,3 +10,15 @@ server admin by updating the database directly, e.g.:
 ``UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'``
 
 Restarting may be required for the changes to register.
+
+Using an admin access_token
+###########################
+
+Many of the API calls listed in the documentation here will require to include an admin `access_token`.
+Finding your user's `access_token` is client-dependent, but will usually be shown in the client's settings.
+
+Once you have your `access_token`, to include it in a request, the best option is to add the token to a request header:
+
+``curl --header "Authorization: Bearer <access_token>" <the_rest_of_your_API_request>``
+
+Fore more details, please refer to the complete `matrix spec documentation <https://matrix.org/docs/spec/client_server/r0.5.0#using-access-tokens>`_.

From c755955f335984dc6f97a269b57ad955f257ef8f Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Fri, 13 Sep 2019 01:58:18 -0600
Subject: [PATCH 044/276] Add developer docs for using SAML without a server
 (#6032)

---
 changelog.d/6032.misc |  1 +
 docs/dev/saml.md      | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 changelog.d/6032.misc
 create mode 100644 docs/dev/saml.md

diff --git a/changelog.d/6032.misc b/changelog.d/6032.misc
new file mode 100644
index 000000000..ec5b5eb88
--- /dev/null
+++ b/changelog.d/6032.misc
@@ -0,0 +1 @@
+Add developer documentation for using SAML2.
diff --git a/docs/dev/saml.md b/docs/dev/saml.md
new file mode 100644
index 000000000..f41aadce4
--- /dev/null
+++ b/docs/dev/saml.md
@@ -0,0 +1,37 @@
+# How to test SAML as a developer without a server
+
+https://capriza.github.io/samling/samling.html (https://github.com/capriza/samling) is a great
+resource for being able to tinker with the SAML options within Synapse without needing to
+deploy and configure a complicated software stack.
+
+To make Synapse (and therefore Riot) use it:
+
+1. Use the samling.html URL above or deploy your own and visit the IdP Metadata tab.
+2. Copy the XML to your clipboard.
+3. On your Synapse server, create a new file `samling.xml` next to your `homeserver.yaml` with
+   the XML from step 2 as the contents.
+4. Edit your `homeserver.yaml` to include:
+   ```yaml
+   saml2_config:
+     sp_config:
+       allow_unknown_attributes: true  # Works around a bug with AVA Hashes: https://github.com/IdentityPython/pysaml2/issues/388
+       metadata:
+         local: ["samling.xml"]   
+   ```
+5. Run `apt-get install xmlsec1` and `pip install --upgrade --force 'pysaml2>=4.5.0'` to ensure
+   the dependencies are installed and ready to go.
+6. Restart Synapse.
+
+Then in Riot:
+
+1. Visit the login page with a Riot pointing at your homeserver.
+2. Click the Single Sign-On button.
+3. On the samling page, enter a Name Identifier and add a SAML Attribute for `uid=your_localpart`.
+   The response must also be signed.
+4. Click "Next".
+5. Click "Post Response" (change nothing).
+6. You should be logged in.
+
+If you try and repeat this process, you may be automatically logged in using the information you
+gave previously. To fix this, open your developer console (`F12` or `Ctrl+Shift+I`) while on the
+samling page and clear the site data. In Chrome, this will be a button on the Application tab.

From 785cbd3999ab011440b453e07992d3b0c92a4059 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 13 Sep 2019 12:07:03 +0100
Subject: [PATCH 045/276] Make the sample saml config closer to our standards

It' still not great, thanks to the nested dictionaries, but it's better.
---
 docs/sample_config.yaml        | 110 +++++++++++++++++---------------
 synapse/config/saml2_config.py | 113 ++++++++++++++++++---------------
 2 files changed, 121 insertions(+), 102 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 0c6be30e5..8cfc5c312 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1031,12 +1031,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 
 # Enable SAML2 for registration and login. Uses pysaml2.
 #
-# `sp_config` is the configuration for the pysaml2 Service Provider.
-# See pysaml2 docs for format of config.
+# At least one of `sp_config` or `config_path` must be set in this section to
+# enable SAML login.
 #
-# Default values will be used for the 'entityid' and 'service' settings,
-# so it is not normally necessary to specify them unless you need to
-# override them.
+# (You will probably also want to set the following options to `false` to
+# disable the regular login/registration flows:
+#   * enable_registration
+#   * password_config.enabled
 #
 # Once SAML support is enabled, a metadata file will be exposed at
 # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to
@@ -1044,52 +1045,59 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 # the IdP to use an ACS location of
 # https://<server>:<port>/_matrix/saml2/authn_response.
 #
-#saml2_config:
-#  sp_config:
-#    # point this to the IdP's metadata. You can use either a local file or
-#    # (preferably) a URL.
-#    metadata:
-#      #local: ["saml2/idp.xml"]
-#      remote:
-#        - url: https://our_idp/metadata.xml
-#
-#    # By default, the user has to go to our login page first. If you'd like to
-#    # allow IdP-initiated login, set 'allow_unsolicited: True' in a
-#    # 'service.sp' section:
-#    #
-#    #service:
-#    #  sp:
-#    #    allow_unsolicited: True
-#
-#    # The examples below are just used to generate our metadata xml, and you
-#    # may well not need it, depending on your setup. Alternatively you
-#    # may need a whole lot more detail - see the pysaml2 docs!
-#
-#    description: ["My awesome SP", "en"]
-#    name: ["Test SP", "en"]
-#
-#    organization:
-#      name: Example com
-#      display_name:
-#        - ["Example co", "en"]
-#      url: "http://example.com"
-#
-#    contact_person:
-#      - given_name: Bob
-#        sur_name: "the Sysadmin"
-#        email_address": ["admin@example.com"]
-#        contact_type": technical
-#
-#  # Instead of putting the config inline as above, you can specify a
-#  # separate pysaml2 configuration file:
-#  #
-#  config_path: "CONFDIR/sp_conf.py"
-#
-#  # the lifetime of a SAML session. This defines how long a user has to
-#  # complete the authentication process, if allow_unsolicited is unset.
-#  # The default is 5 minutes.
-#  #
-#  # saml_session_lifetime: 5m
+saml2_config:
+  # `sp_config` is the configuration for the pysaml2 Service Provider.
+  # See pysaml2 docs for format of config.
+  #
+  # Default values will be used for the 'entityid' and 'service' settings,
+  # so it is not normally necessary to specify them unless you need to
+  # override them.
+  #
+  #sp_config:
+  #  # point this to the IdP's metadata. You can use either a local file or
+  #  # (preferably) a URL.
+  #  metadata:
+  #    #local: ["saml2/idp.xml"]
+  #    remote:
+  #      - url: https://our_idp/metadata.xml
+  #
+  #    # By default, the user has to go to our login page first. If you'd like
+  #    # to allow IdP-initiated login, set 'allow_unsolicited: True' in a
+  #    # 'service.sp' section:
+  #    #
+  #    #service:
+  #    #  sp:
+  #    #    allow_unsolicited: true
+  #
+  #    # The examples below are just used to generate our metadata xml, and you
+  #    # may well not need them, depending on your setup. Alternatively you
+  #    # may need a whole lot more detail - see the pysaml2 docs!
+  #
+  #    description: ["My awesome SP", "en"]
+  #    name: ["Test SP", "en"]
+  #
+  #    organization:
+  #      name: Example com
+  #      display_name:
+  #        - ["Example co", "en"]
+  #      url: "http://example.com"
+  #
+  #    contact_person:
+  #      - given_name: Bob
+  #        sur_name: "the Sysadmin"
+  #        email_address": ["admin@example.com"]
+  #        contact_type": technical
+
+  # Instead of putting the config inline as above, you can specify a
+  # separate pysaml2 configuration file:
+  #
+  #config_path: "CONFDIR/sp_conf.py"
+
+  # the lifetime of a SAML session. This defines how long a user has to
+  # complete the authentication process, if allow_unsolicited is unset.
+  # The default is 5 minutes.
+  #
+  #saml_session_lifetime: 5m
 
 
 
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 6a8161547..c46ac087d 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -26,6 +26,9 @@ class SAML2Config(Config):
         if not saml2_config or not saml2_config.get("enabled", True):
             return
 
+        if not saml2_config.get("sp_config") and not saml2_config.get("config_path"):
+            return
+
         try:
             check_requirements("saml2")
         except DependencyException as e:
@@ -76,12 +79,13 @@ class SAML2Config(Config):
         return """\
         # Enable SAML2 for registration and login. Uses pysaml2.
         #
-        # `sp_config` is the configuration for the pysaml2 Service Provider.
-        # See pysaml2 docs for format of config.
+        # At least one of `sp_config` or `config_path` must be set in this section to
+        # enable SAML login.
         #
-        # Default values will be used for the 'entityid' and 'service' settings,
-        # so it is not normally necessary to specify them unless you need to
-        # override them.
+        # (You will probably also want to set the following options to `false` to
+        # disable the regular login/registration flows:
+        #   * enable_registration
+        #   * password_config.enabled
         #
         # Once SAML support is enabled, a metadata file will be exposed at
         # https://<server>:<port>/_matrix/saml2/metadata.xml, which you may be able to
@@ -89,52 +93,59 @@ class SAML2Config(Config):
         # the IdP to use an ACS location of
         # https://<server>:<port>/_matrix/saml2/authn_response.
         #
-        #saml2_config:
-        #  sp_config:
-        #    # point this to the IdP's metadata. You can use either a local file or
-        #    # (preferably) a URL.
-        #    metadata:
-        #      #local: ["saml2/idp.xml"]
-        #      remote:
-        #        - url: https://our_idp/metadata.xml
-        #
-        #    # By default, the user has to go to our login page first. If you'd like to
-        #    # allow IdP-initiated login, set 'allow_unsolicited: True' in a
-        #    # 'service.sp' section:
-        #    #
-        #    #service:
-        #    #  sp:
-        #    #    allow_unsolicited: True
-        #
-        #    # The examples below are just used to generate our metadata xml, and you
-        #    # may well not need it, depending on your setup. Alternatively you
-        #    # may need a whole lot more detail - see the pysaml2 docs!
-        #
-        #    description: ["My awesome SP", "en"]
-        #    name: ["Test SP", "en"]
-        #
-        #    organization:
-        #      name: Example com
-        #      display_name:
-        #        - ["Example co", "en"]
-        #      url: "http://example.com"
-        #
-        #    contact_person:
-        #      - given_name: Bob
-        #        sur_name: "the Sysadmin"
-        #        email_address": ["admin@example.com"]
-        #        contact_type": technical
-        #
-        #  # Instead of putting the config inline as above, you can specify a
-        #  # separate pysaml2 configuration file:
-        #  #
-        #  config_path: "%(config_dir_path)s/sp_conf.py"
-        #
-        #  # the lifetime of a SAML session. This defines how long a user has to
-        #  # complete the authentication process, if allow_unsolicited is unset.
-        #  # The default is 5 minutes.
-        #  #
-        #  # saml_session_lifetime: 5m
+        saml2_config:
+          # `sp_config` is the configuration for the pysaml2 Service Provider.
+          # See pysaml2 docs for format of config.
+          #
+          # Default values will be used for the 'entityid' and 'service' settings,
+          # so it is not normally necessary to specify them unless you need to
+          # override them.
+          #
+          #sp_config:
+          #  # point this to the IdP's metadata. You can use either a local file or
+          #  # (preferably) a URL.
+          #  metadata:
+          #    #local: ["saml2/idp.xml"]
+          #    remote:
+          #      - url: https://our_idp/metadata.xml
+          #
+          #    # By default, the user has to go to our login page first. If you'd like
+          #    # to allow IdP-initiated login, set 'allow_unsolicited: True' in a
+          #    # 'service.sp' section:
+          #    #
+          #    #service:
+          #    #  sp:
+          #    #    allow_unsolicited: true
+          #
+          #    # The examples below are just used to generate our metadata xml, and you
+          #    # may well not need them, depending on your setup. Alternatively you
+          #    # may need a whole lot more detail - see the pysaml2 docs!
+          #
+          #    description: ["My awesome SP", "en"]
+          #    name: ["Test SP", "en"]
+          #
+          #    organization:
+          #      name: Example com
+          #      display_name:
+          #        - ["Example co", "en"]
+          #      url: "http://example.com"
+          #
+          #    contact_person:
+          #      - given_name: Bob
+          #        sur_name: "the Sysadmin"
+          #        email_address": ["admin@example.com"]
+          #        contact_type": technical
+
+          # Instead of putting the config inline as above, you can specify a
+          # separate pysaml2 configuration file:
+          #
+          #config_path: "%(config_dir_path)s/sp_conf.py"
+
+          # the lifetime of a SAML session. This defines how long a user has to
+          # complete the authentication process, if allow_unsolicited is unset.
+          # The default is 5 minutes.
+          #
+          #saml_session_lifetime: 5m
         """ % {
             "config_dir_path": config_dir_path
         }

From a8ac40445c98b9e1fc2538d7d4ec49c80b0298ac Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 13 Sep 2019 15:20:49 +0100
Subject: [PATCH 046/276] Record mappings from saml users in an external table

We want to assign unique mxids to saml users based on an incrementing
suffix. For that to work, we need to record the allocated mxid in a separate
table.
---
 docs/sample_config.yaml                       |  26 +++++
 synapse/config/saml2_config.py                |  78 ++++++++++++-
 synapse/handlers/saml_handler.py              | 103 ++++++++++++++++--
 synapse/rest/client/v1/login.py               |  14 +++
 synapse/storage/registration.py               |  41 +++++++
 .../schema/delta/56/user_external_ids.sql     |  24 ++++
 6 files changed, 276 insertions(+), 10 deletions(-)
 create mode 100644 synapse/storage/schema/delta/56/user_external_ids.sql

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 8cfc5c312..9021fe2cb 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1099,6 +1099,32 @@ saml2_config:
   #
   #saml_session_lifetime: 5m
 
+  # The SAML attribute (after mapping via the attribute maps) to use to derive
+  # the Matrix ID from. 'uid' by default.
+  #
+  #mxid_source_attribute: displayName
+
+  # The mapping system to use for mapping the saml attribute onto a matrix ID.
+  # Options include:
+  #  * 'hexencode' (which maps unpermitted characters to '=xx')
+  #  * 'dotreplace' (which replaces unpermitted characters with '.').
+  # The default is 'hexencode'.
+  #
+  #mxid_mapping: dotreplace
+
+  # In previous versions of synapse, the mapping from SAML attribute to MXID was
+  # always calculated dynamically rather than stored in a table. For backwards-
+  # compatibility, we will look for user_ids matching such a pattern before
+  # creating a new account.
+  #
+  # This setting controls the SAML attribute which will be used for this
+  # backwards-compatibility lookup. Typically it should be 'uid', but if the
+  # attribute maps are changed, it may be necessary to change it.
+  #
+  # The default is 'uid'.
+  #
+  #grandfathered_mxid_source_attribute: upn
+
 
 
 # Enable CAS for registration and login.
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index c46ac087d..a02247070 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -12,7 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+
 from synapse.python_dependencies import DependencyException, check_requirements
+from synapse.types import (
+    map_username_to_mxid_localpart,
+    mxid_localpart_allowed_characters,
+)
 
 from ._base import Config, ConfigError
 
@@ -36,6 +42,14 @@ class SAML2Config(Config):
 
         self.saml2_enabled = True
 
+        self.saml2_mxid_source_attribute = saml2_config.get(
+            "mxid_source_attribute", "uid"
+        )
+
+        self.saml2_grandfathered_mxid_source_attribute = saml2_config.get(
+            "grandfathered_mxid_source_attribute", "uid"
+        )
+
         import saml2.config
 
         self.saml2_sp_config = saml2.config.SPConfig()
@@ -51,6 +65,12 @@ class SAML2Config(Config):
             saml2_config.get("saml_session_lifetime", "5m")
         )
 
+        mapping = saml2_config.get("mxid_mapping", "hexencode")
+        try:
+            self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping]
+        except KeyError:
+            raise ConfigError("%s is not a known mxid_mapping" % (mapping,))
+
     def _default_saml_config_dict(self):
         import saml2
 
@@ -58,6 +78,13 @@ class SAML2Config(Config):
         if public_baseurl is None:
             raise ConfigError("saml2_config requires a public_baseurl to be set")
 
+        required_attributes = {"uid", self.saml2_mxid_source_attribute}
+
+        optional_attributes = {"displayName"}
+        if self.saml2_grandfathered_mxid_source_attribute:
+            optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute)
+        optional_attributes -= required_attributes
+
         metadata_url = public_baseurl + "_matrix/saml2/metadata.xml"
         response_url = public_baseurl + "_matrix/saml2/authn_response"
         return {
@@ -69,8 +96,9 @@ class SAML2Config(Config):
                             (response_url, saml2.BINDING_HTTP_POST)
                         ]
                     },
-                    "required_attributes": ["uid"],
-                    "optional_attributes": ["mail", "surname", "givenname"],
+                    "required_attributes": list(required_attributes),
+                    "optional_attributes": list(optional_attributes),
+                    # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT,
                 }
             },
         }
@@ -146,6 +174,52 @@ class SAML2Config(Config):
           # The default is 5 minutes.
           #
           #saml_session_lifetime: 5m
+
+          # The SAML attribute (after mapping via the attribute maps) to use to derive
+          # the Matrix ID from. 'uid' by default.
+          #
+          #mxid_source_attribute: displayName
+
+          # The mapping system to use for mapping the saml attribute onto a matrix ID.
+          # Options include:
+          #  * 'hexencode' (which maps unpermitted characters to '=xx')
+          #  * 'dotreplace' (which replaces unpermitted characters with '.').
+          # The default is 'hexencode'.
+          #
+          #mxid_mapping: dotreplace
+
+          # In previous versions of synapse, the mapping from SAML attribute to MXID was
+          # always calculated dynamically rather than stored in a table. For backwards-
+          # compatibility, we will look for user_ids matching such a pattern before
+          # creating a new account.
+          #
+          # This setting controls the SAML attribute which will be used for this
+          # backwards-compatibility lookup. Typically it should be 'uid', but if the
+          # attribute maps are changed, it may be necessary to change it.
+          #
+          # The default is 'uid'.
+          #
+          #grandfathered_mxid_source_attribute: upn
         """ % {
             "config_dir_path": config_dir_path
         }
+
+
+DOT_REPLACE_PATTERN = re.compile(
+    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),))
+)
+
+
+def dot_replace_for_mxid(username: str) -> str:
+    username = username.lower()
+    username = DOT_REPLACE_PATTERN.sub(".", username)
+
+    # regular mxids aren't allowed to start with an underscore either
+    username = re.sub("^_", "", username)
+    return username
+
+
+MXID_MAPPER_MAP = {
+    "hexencode": map_username_to_mxid_localpart,
+    "dotreplace": dot_replace_for_mxid,
+}
diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index a1ce6929c..5fa8272dc 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -21,6 +21,8 @@ from saml2.client import Saml2Client
 from synapse.api.errors import SynapseError
 from synapse.http.servlet import parse_string
 from synapse.rest.client.v1.login import SSOAuthHandler
+from synapse.types import UserID, map_username_to_mxid_localpart
+from synapse.util.async_helpers import Linearizer
 
 logger = logging.getLogger(__name__)
 
@@ -29,12 +31,26 @@ class SamlHandler:
     def __init__(self, hs):
         self._saml_client = Saml2Client(hs.config.saml2_sp_config)
         self._sso_auth_handler = SSOAuthHandler(hs)
+        self._registration_handler = hs.get_registration_handler()
+
+        self._clock = hs.get_clock()
+        self._datastore = hs.get_datastore()
+        self._hostname = hs.hostname
+        self._saml2_session_lifetime = hs.config.saml2_session_lifetime
+        self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute
+        self._grandfathered_mxid_source_attribute = (
+            hs.config.saml2_grandfathered_mxid_source_attribute
+        )
+        self._mxid_mapper = hs.config.saml2_mxid_mapper
+
+        # identifier for the external_ids table
+        self._auth_provider_id = "saml"
 
         # a map from saml session id to Saml2SessionData object
         self._outstanding_requests_dict = {}
 
-        self._clock = hs.get_clock()
-        self._saml2_session_lifetime = hs.config.saml2_session_lifetime
+        # a lock on the mappings
+        self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock)
 
     def handle_redirect_request(self, client_redirect_url):
         """Handle an incoming request to /login/sso/redirect
@@ -60,7 +76,7 @@ class SamlHandler:
         # this shouldn't happen!
         raise Exception("prepare_for_authenticate didn't return a Location header")
 
-    def handle_saml_response(self, request):
+    async def handle_saml_response(self, request):
         """Handle an incoming request to /_matrix/saml2/authn_response
 
         Args:
@@ -77,6 +93,10 @@ class SamlHandler:
         # the dict.
         self.expire_sessions()
 
+        user_id = await self._map_saml_response_to_user(resp_bytes)
+        self._sso_auth_handler.complete_sso_login(user_id, request, relay_state)
+
+    async def _map_saml_response_to_user(self, resp_bytes):
         try:
             saml2_auth = self._saml_client.parse_authn_request_response(
                 resp_bytes,
@@ -91,18 +111,85 @@ class SamlHandler:
             logger.warning("SAML2 response was not signed")
             raise SynapseError(400, "SAML2 response was not signed")
 
-        if "uid" not in saml2_auth.ava:
+        try:
+            remote_user_id = saml2_auth.ava["uid"][0]
+        except KeyError:
             logger.warning("SAML2 response lacks a 'uid' attestation")
             raise SynapseError(400, "uid not in SAML2 response")
 
+        try:
+            mxid_source = saml2_auth.ava[self._mxid_source_attribute][0]
+        except KeyError:
+            logger.warning(
+                "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute
+            )
+            raise SynapseError(
+                400, "%s not in SAML2 response" % (self._mxid_source_attribute,)
+            )
+
         self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None)
 
-        username = saml2_auth.ava["uid"][0]
         displayName = saml2_auth.ava.get("displayName", [None])[0]
 
-        return self._sso_auth_handler.on_successful_auth(
-            username, request, relay_state, user_display_name=displayName
-        )
+        with (await self._mapping_lock.queue(self._auth_provider_id)):
+            # first of all, check if we already have a mapping for this user
+            logger.info(
+                "Looking for existing mapping for user %s:%s",
+                self._auth_provider_id,
+                remote_user_id,
+            )
+            registered_user_id = await self._datastore.get_user_by_external_id(
+                self._auth_provider_id, remote_user_id
+            )
+            if registered_user_id is not None:
+                logger.info("Found existing mapping %s", registered_user_id)
+                return registered_user_id
+
+            # backwards-compatibility hack: see if there is an existing user with a
+            # suitable mapping from the uid
+            if (
+                self._grandfathered_mxid_source_attribute
+                and self._grandfathered_mxid_source_attribute in saml2_auth.ava
+            ):
+                attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0]
+                user_id = UserID(
+                    map_username_to_mxid_localpart(attrval), self._hostname
+                ).to_string()
+                logger.info(
+                    "Looking for existing account based on mapped %s %s",
+                    self._grandfathered_mxid_source_attribute,
+                    user_id,
+                )
+
+                users = await self._datastore.get_users_by_id_case_insensitive(user_id)
+                if users:
+                    registered_user_id = list(users.keys())[0]
+                    logger.info("Grandfathering mapping to %s", registered_user_id)
+                    await self._datastore.record_user_external_id(
+                        self._auth_provider_id, remote_user_id, registered_user_id
+                    )
+                    return registered_user_id
+
+            # figure out a new mxid for this user
+            base_mxid_localpart = self._mxid_mapper(mxid_source)
+
+            suffix = 0
+            while True:
+                localpart = base_mxid_localpart + (str(suffix) if suffix else "")
+                if not await self._datastore.get_users_by_id_case_insensitive(
+                    UserID(localpart, self._hostname).to_string()
+                ):
+                    break
+                suffix += 1
+            logger.info("Allocating mxid for new user with localpart %s", localpart)
+
+            registered_user_id = await self._registration_handler.register_user(
+                localpart=localpart, default_display_name=displayName
+            )
+            await self._datastore.record_user_external_id(
+                self._auth_provider_id, remote_user_id, registered_user_id
+            )
+            return registered_user_id
 
     def expire_sessions(self):
         expire_before = self._clock.time_msec() - self._saml2_session_lifetime
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 5762b9fd0..eeaa72b20 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -29,6 +29,7 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.http.site import SynapseRequest
 from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.rest.well_known import WellKnownBuilder
 from synapse.types import UserID, map_username_to_mxid_localpart
@@ -507,6 +508,19 @@ class SSOAuthHandler(object):
                 localpart=localpart, default_display_name=user_display_name
             )
 
+        self.complete_sso_login(registered_user_id, request, client_redirect_url)
+
+    def complete_sso_login(
+        self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str
+    ):
+        """Having figured out a mxid for this user, complete the HTTP request
+
+        Args:
+            registered_user_id:
+            request:
+            client_redirect_url:
+        """
+
         login_token = self._macaroon_gen.generate_short_term_login_token(
             registered_user_id
         )
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 55e4e84d7..1e3c2148f 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -22,6 +22,7 @@ from six import iterkeys
 from six.moves import range
 
 from twisted.internet import defer
+from twisted.internet.defer import Deferred
 
 from synapse.api.constants import UserTypes
 from synapse.api.errors import Codes, StoreError, ThreepidValidationError
@@ -337,6 +338,26 @@ class RegistrationWorkerStore(SQLBaseStore):
 
         return self.runInteraction("get_users_by_id_case_insensitive", f)
 
+    async def get_user_by_external_id(
+        self, auth_provider: str, external_id: str
+    ) -> str:
+        """Look up a user by their external auth id
+
+        Args:
+            auth_provider: identifier for the remote auth provider
+            external_id: id on that system
+
+        Returns:
+            str|None: the mxid of the user, or None if they are not known
+        """
+        return await self._simple_select_one_onecol(
+            table="user_external_ids",
+            keyvalues={"auth_provider": auth_provider, "external_id": external_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="get_user_by_external_id",
+        )
+
     @defer.inlineCallbacks
     def count_all_users(self):
         """Counts all users registered on the homeserver."""
@@ -848,6 +869,26 @@ class RegistrationStore(
         self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
         txn.call_after(self.is_guest.invalidate, (user_id,))
 
+    def record_user_external_id(
+        self, auth_provider: str, external_id: str, user_id: str
+    ) -> Deferred:
+        """Record a mapping from an external user id to a mxid
+
+        Args:
+            auth_provider: identifier for the remote auth provider
+            external_id: id on that system
+            user_id: complete mxid that it is mapped to
+        """
+        return self._simple_insert(
+            table="user_external_ids",
+            values={
+                "auth_provider": auth_provider,
+                "external_id": external_id,
+                "user_id": user_id,
+            },
+            desc="record_user_external_id",
+        )
+
     def user_set_password_hash(self, user_id, password_hash):
         """
         NB. This does *not* evict any cache because the one use for this
diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql
new file mode 100644
index 000000000..91390c452
--- /dev/null
+++ b/synapse/storage/schema/delta/56/user_external_ids.sql
@@ -0,0 +1,24 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * a table which records mappings from external auth providers to mxids
+ */
+CREATE TABLE IF NOT EXISTS user_external_ids (
+    auth_provider TEXT NOT NULL,
+    external_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    UNIQUE (auth_provider, external_id)
+);

From b9d57502da8ae4e11523a155e0fd608433e1025d Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 13 Sep 2019 16:06:03 +0100
Subject: [PATCH 047/276] changelog

---
 changelog.d/6037.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6037.feature

diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature
new file mode 100644
index 000000000..95d82bd4d
--- /dev/null
+++ b/changelog.d/6037.feature
@@ -0,0 +1 @@
+Handle userid clashes when authenticating via SAML by appending an integer suffix.
\ No newline at end of file

From 850dcfd2d3a1d689042fb38c8a16b652244068c2 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 14 Sep 2019 04:58:38 +1000
Subject: [PATCH 048/276] Fix well-known lookups with the federation
 certificate whitelist (#5997)

---
 changelog.d/5996.bugfix                       |  1 +
 synapse/config/tls.py                         |  9 ++++-
 synapse/crypto/context_factory.py             | 26 ++++++------
 .../federation/matrix_federation_agent.py     |  2 +-
 tests/config/test_tls.py                      | 40 +++++++++++++++++++
 5 files changed, 63 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/5996.bugfix

diff --git a/changelog.d/5996.bugfix b/changelog.d/5996.bugfix
new file mode 100644
index 000000000..05e31faaa
--- /dev/null
+++ b/changelog.d/5996.bugfix
@@ -0,0 +1 @@
+federation_certificate_verification_whitelist now will not cause TypeErrors to be raised (a regression in 1.3). Additionally, it now supports internationalised domain names in their non-canonical representation.
diff --git a/synapse/config/tls.py b/synapse/config/tls.py
index c0148aa95..fc47ba3e9 100644
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -110,8 +110,15 @@ class TlsConfig(Config):
         # Support globs (*) in whitelist values
         self.federation_certificate_verification_whitelist = []
         for entry in fed_whitelist_entries:
+            try:
+                entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii"))
+            except UnicodeEncodeError:
+                raise ConfigError(
+                    "IDNA domain names are not allowed in the "
+                    "federation_certificate_verification_whitelist: %s" % (entry,)
+                )
+
             # Convert globs to regex
-            entry_regex = glob_to_regex(entry)
             self.federation_certificate_verification_whitelist.append(entry_regex)
 
         # List of custom certificate authorities for federation traffic validation
diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py
index 06e63a96b..e93f0b370 100644
--- a/synapse/crypto/context_factory.py
+++ b/synapse/crypto/context_factory.py
@@ -15,7 +15,6 @@
 
 import logging
 
-import idna
 from service_identity import VerificationError
 from service_identity.pyopenssl import verify_hostname, verify_ip_address
 from zope.interface import implementer
@@ -114,14 +113,20 @@ class ClientTLSOptionsFactory(object):
         self._no_verify_ssl_context = self._no_verify_ssl.getContext()
         self._no_verify_ssl_context.set_info_callback(self._context_info_cb)
 
-    def get_options(self, host):
+    def get_options(self, host: bytes):
+
+        # IPolicyForHTTPS.get_options takes bytes, but we want to compare
+        # against the str whitelist. The hostnames in the whitelist are already
+        # IDNA-encoded like the hosts will be here.
+        ascii_host = host.decode("ascii")
+
         # Check if certificate verification has been enabled
         should_verify = self._config.federation_verify_certificates
 
         # Check if we've disabled certificate verification for this host
         if should_verify:
             for regex in self._config.federation_certificate_verification_whitelist:
-                if regex.match(host):
+                if regex.match(ascii_host):
                     should_verify = False
                     break
 
@@ -162,7 +167,7 @@ class SSLClientConnectionCreator(object):
     Replaces twisted.internet.ssl.ClientTLSOptions
     """
 
-    def __init__(self, hostname, ctx, verify_certs):
+    def __init__(self, hostname: bytes, ctx, verify_certs: bool):
         self._ctx = ctx
         self._verifier = ConnectionVerifier(hostname, verify_certs)
 
@@ -190,21 +195,16 @@ class ConnectionVerifier(object):
 
     # This code is based on twisted.internet.ssl.ClientTLSOptions.
 
-    def __init__(self, hostname, verify_certs):
+    def __init__(self, hostname: bytes, verify_certs):
         self._verify_certs = verify_certs
 
-        if isIPAddress(hostname) or isIPv6Address(hostname):
-            self._hostnameBytes = hostname.encode("ascii")
+        _decoded = hostname.decode("ascii")
+        if isIPAddress(_decoded) or isIPv6Address(_decoded):
             self._is_ip_address = True
         else:
-            # twisted's ClientTLSOptions falls back to the stdlib impl here if
-            # idna is not installed, but points out that lacks support for
-            # IDNA2008 (http://bugs.python.org/issue17305).
-            #
-            # We can rely on having idna.
-            self._hostnameBytes = idna.encode(hostname)
             self._is_ip_address = False
 
+        self._hostnameBytes = hostname
         self._hostnameASCII = self._hostnameBytes.decode("ascii")
 
     def verify_context_info_cb(self, ssl_connection, where):
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index feae7de5b..647d26dc5 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -217,7 +217,7 @@ class MatrixHostnameEndpoint(object):
             self._tls_options = None
         else:
             self._tls_options = tls_client_options_factory.get_options(
-                self._parsed_uri.host.decode("ascii")
+                self._parsed_uri.host
             )
 
         self._srv_resolver = srv_resolver
diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py
index 8e0c4b953..b02780772 100644
--- a/tests/config/test_tls.py
+++ b/tests/config/test_tls.py
@@ -16,6 +16,7 @@
 
 import os
 
+import idna
 import yaml
 
 from OpenSSL import SSL
@@ -235,3 +236,42 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         )
 
         self.assertTrue(conf.acme_enabled)
+
+    def test_whitelist_idna_failure(self):
+        """
+        The federation certificate whitelist will not allow IDNA domain names.
+        """
+        config = {
+            "federation_certificate_verification_whitelist": [
+                "example.com",
+                "*.ドメイン.テスト",
+            ]
+        }
+        t = TestConfig()
+        e = self.assertRaises(
+            ConfigError, t.read_config, config, config_dir_path="", data_dir_path=""
+        )
+        self.assertIn("IDNA domain names", str(e))
+
+    def test_whitelist_idna_result(self):
+        """
+        The federation certificate whitelist will match on IDNA encoded names.
+        """
+        config = {
+            "federation_certificate_verification_whitelist": [
+                "example.com",
+                "*.xn--eckwd4c7c.xn--zckzah",
+            ]
+        }
+        t = TestConfig()
+        t.read_config(config, config_dir_path="", data_dir_path="")
+
+        cf = ClientTLSOptionsFactory(t)
+
+        # Not in the whitelist
+        opts = cf.get_options(b"notexample.com")
+        self.assertTrue(opts._verifier._verify_certs)
+
+        # Caught by the wildcard
+        opts = cf.get_options(idna.encode("テスト.ドメイン.テスト"))
+        self.assertFalse(opts._verifier._verify_certs)

From 1e19ce00bff8d67168d39201cdf9424f7b2f22f6 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 17 Sep 2019 11:41:54 +0100
Subject: [PATCH 049/276] Add 'failure_ts' column to 'destinations' table
 (#6016)

Track the time that a server started failing at, for general analysis purposes.
---
 changelog.d/6016.misc                         |   1 +
 .../delta/56/destinations_failure_ts.sql      |  25 ++++
 synapse/storage/transactions.py               |  23 +++-
 synapse/util/retryutils.py                    |  16 ++-
 tests/handlers/test_typing.py                 |   7 +-
 tests/storage/test_transactions.py            |   8 +-
 tests/util/test_retryutils.py                 | 127 ++++++++++++++++++
 7 files changed, 195 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/6016.misc
 create mode 100644 synapse/storage/schema/delta/56/destinations_failure_ts.sql
 create mode 100644 tests/util/test_retryutils.py

diff --git a/changelog.d/6016.misc b/changelog.d/6016.misc
new file mode 100644
index 000000000..91cf16471
--- /dev/null
+++ b/changelog.d/6016.misc
@@ -0,0 +1 @@
+Add a 'failure_ts' column to the 'destinations' database table.
diff --git a/synapse/storage/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/schema/delta/56/destinations_failure_ts.sql
new file mode 100644
index 000000000..f00889290
--- /dev/null
+++ b/synapse/storage/schema/delta/56/destinations_failure_ts.sql
@@ -0,0 +1,25 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Record the timestamp when a given server started failing
+ */
+ALTER TABLE destinations ADD failure_ts BIGINT;
+
+/* as a rough approximation, we assume that the server started failing at
+ * retry_interval before the last retry
+ */
+UPDATE destinations SET failure_ts = retry_last_ts - retry_interval
+    WHERE retry_last_ts > 0;
diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py
index d81ace0ec..289c11739 100644
--- a/synapse/storage/transactions.py
+++ b/synapse/storage/transactions.py
@@ -165,7 +165,7 @@ class TransactionStore(SQLBaseStore):
             txn,
             table="destinations",
             keyvalues={"destination": destination},
-            retcols=("destination", "retry_last_ts", "retry_interval"),
+            retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"),
             allow_none=True,
         )
 
@@ -174,12 +174,15 @@ class TransactionStore(SQLBaseStore):
         else:
             return None
 
-    def set_destination_retry_timings(self, destination, retry_last_ts, retry_interval):
+    def set_destination_retry_timings(
+        self, destination, failure_ts, retry_last_ts, retry_interval
+    ):
         """Sets the current retry timings for a given destination.
         Both timings should be zero if retrying is no longer occuring.
 
         Args:
             destination (str)
+            failure_ts (int|None) - when the server started failing (ms since epoch)
             retry_last_ts (int) - time of last retry attempt in unix epoch ms
             retry_interval (int) - how long until next retry in ms
         """
@@ -189,12 +192,13 @@ class TransactionStore(SQLBaseStore):
             "set_destination_retry_timings",
             self._set_destination_retry_timings,
             destination,
+            failure_ts,
             retry_last_ts,
             retry_interval,
         )
 
     def _set_destination_retry_timings(
-        self, txn, destination, retry_last_ts, retry_interval
+        self, txn, destination, failure_ts, retry_last_ts, retry_interval
     ):
 
         if self.database_engine.can_native_upsert:
@@ -202,9 +206,12 @@ class TransactionStore(SQLBaseStore):
             # resetting it) or greater than the existing retry interval.
 
             sql = """
-                INSERT INTO destinations (destination, retry_last_ts, retry_interval)
-                    VALUES (?, ?, ?)
+                INSERT INTO destinations (
+                    destination, failure_ts, retry_last_ts, retry_interval
+                )
+                    VALUES (?, ?, ?, ?)
                 ON CONFLICT (destination) DO UPDATE SET
+                        failure_ts = EXCLUDED.failure_ts,
                         retry_last_ts = EXCLUDED.retry_last_ts,
                         retry_interval = EXCLUDED.retry_interval
                     WHERE
@@ -212,7 +219,7 @@ class TransactionStore(SQLBaseStore):
                         OR destinations.retry_interval < EXCLUDED.retry_interval
             """
 
-            txn.execute(sql, (destination, retry_last_ts, retry_interval))
+            txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
 
             return
 
@@ -225,7 +232,7 @@ class TransactionStore(SQLBaseStore):
             txn,
             table="destinations",
             keyvalues={"destination": destination},
-            retcols=("retry_last_ts", "retry_interval"),
+            retcols=("failure_ts", "retry_last_ts", "retry_interval"),
             allow_none=True,
         )
 
@@ -235,6 +242,7 @@ class TransactionStore(SQLBaseStore):
                 table="destinations",
                 values={
                     "destination": destination,
+                    "failure_ts": failure_ts,
                     "retry_last_ts": retry_last_ts,
                     "retry_interval": retry_interval,
                 },
@@ -245,6 +253,7 @@ class TransactionStore(SQLBaseStore):
                 "destinations",
                 keyvalues={"destination": destination},
                 updatevalues={
+                    "failure_ts": failure_ts,
                     "retry_last_ts": retry_last_ts,
                     "retry_interval": retry_interval,
                 },
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index b740913b5..a5f2fbef5 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -80,11 +80,13 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs)
             # We aren't ready to retry that destination.
             raise
     """
+    failure_ts = None
     retry_last_ts, retry_interval = (0, 0)
 
     retry_timings = yield store.get_destination_retry_timings(destination)
 
     if retry_timings:
+        failure_ts = retry_timings["failure_ts"]
         retry_last_ts, retry_interval = (
             retry_timings["retry_last_ts"],
             retry_timings["retry_interval"],
@@ -108,6 +110,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs)
         destination,
         clock,
         store,
+        failure_ts,
         retry_interval,
         backoff_on_failure=backoff_on_failure,
         **kwargs
@@ -120,6 +123,7 @@ class RetryDestinationLimiter(object):
         destination,
         clock,
         store,
+        failure_ts,
         retry_interval,
         backoff_on_404=False,
         backoff_on_failure=True,
@@ -133,6 +137,8 @@ class RetryDestinationLimiter(object):
             destination (str)
             clock (Clock)
             store (DataStore)
+            failure_ts (int|None): when this destination started failing (in ms since
+                the epoch), or zero if the last request was successful
             retry_interval (int): The next retry interval taken from the
                 database in milliseconds, or zero if the last request was
                 successful.
@@ -145,6 +151,7 @@ class RetryDestinationLimiter(object):
         self.store = store
         self.destination = destination
 
+        self.failure_ts = failure_ts
         self.retry_interval = retry_interval
         self.backoff_on_404 = backoff_on_404
         self.backoff_on_failure = backoff_on_failure
@@ -186,6 +193,7 @@ class RetryDestinationLimiter(object):
             logger.debug(
                 "Connection to %s was successful; clearing backoff", self.destination
             )
+            self.failure_ts = None
             retry_last_ts = 0
             self.retry_interval = 0
         elif not self.backoff_on_failure:
@@ -211,11 +219,17 @@ class RetryDestinationLimiter(object):
             )
             retry_last_ts = int(self.clock.time_msec())
 
+            if self.failure_ts is None:
+                self.failure_ts = retry_last_ts
+
         @defer.inlineCallbacks
         def store_retry_timings():
             try:
                 yield self.store.set_destination_retry_timings(
-                    self.destination, retry_last_ts, self.retry_interval
+                    self.destination,
+                    self.failure_ts,
+                    retry_last_ts,
+                    self.retry_interval,
                 )
             except Exception:
                 logger.exception("Failed to store destination_retry_timings")
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 5d5e324df..1f2ef5d01 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -99,7 +99,12 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         self.event_source = hs.get_event_sources().sources["typing"]
 
         self.datastore = hs.get_datastore()
-        retry_timings_res = {"destination": "", "retry_last_ts": 0, "retry_interval": 0}
+        retry_timings_res = {
+            "destination": "",
+            "retry_last_ts": 0,
+            "retry_interval": 0,
+            "failure_ts": None,
+        }
         self.datastore.get_destination_retry_timings.return_value = defer.succeed(
             retry_timings_res
         )
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 14169afa9..a771d5af2 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -29,17 +29,19 @@ class TransactionStoreTestCase(HomeserverTestCase):
         r = self.get_success(d)
         self.assertIsNone(r)
 
-        d = self.store.set_destination_retry_timings("example.com", 50, 100)
+        d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
         self.get_success(d)
 
         d = self.store.get_destination_retry_timings("example.com")
         r = self.get_success(d)
 
-        self.assert_dict({"retry_last_ts": 50, "retry_interval": 100}, r)
+        self.assert_dict(
+            {"retry_last_ts": 50, "retry_interval": 100, "failure_ts": 1000}, r
+        )
 
     def test_initial_set_transactions(self):
         """Tests that we can successfully set the destination retries (there
         was a bug around invalidating the cache that broke this)
         """
-        d = self.store.set_destination_retry_timings("example.com", 50, 100)
+        d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
         self.get_success(d)
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
new file mode 100644
index 000000000..9e348694a
--- /dev/null
+++ b/tests/util/test_retryutils.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.util.retryutils import (
+    MIN_RETRY_INTERVAL,
+    RETRY_MULTIPLIER,
+    NotRetryingDestination,
+    get_retry_limiter,
+)
+
+from tests.unittest import HomeserverTestCase
+
+
+class RetryLimiterTestCase(HomeserverTestCase):
+    def test_new_destination(self):
+        """A happy-path case with a new destination and a successful operation"""
+        store = self.hs.get_datastore()
+        d = get_retry_limiter("test_dest", self.clock, store)
+        self.pump()
+        limiter = self.successResultOf(d)
+
+        # advance the clock a bit before making the request
+        self.pump(1)
+
+        with limiter:
+            pass
+
+        d = store.get_destination_retry_timings("test_dest")
+        self.pump()
+        new_timings = self.successResultOf(d)
+        self.assertIsNone(new_timings)
+
+    def test_limiter(self):
+        """General test case which walks through the process of a failing request"""
+        store = self.hs.get_datastore()
+
+        d = get_retry_limiter("test_dest", self.clock, store)
+        self.pump()
+        limiter = self.successResultOf(d)
+
+        self.pump(1)
+        try:
+            with limiter:
+                self.pump(1)
+                failure_ts = self.clock.time_msec()
+                raise AssertionError("argh")
+        except AssertionError:
+            pass
+
+        # wait for the update to land
+        self.pump()
+
+        d = store.get_destination_retry_timings("test_dest")
+        self.pump()
+        new_timings = self.successResultOf(d)
+        self.assertEqual(new_timings["failure_ts"], failure_ts)
+        self.assertEqual(new_timings["retry_last_ts"], failure_ts)
+        self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL)
+
+        # now if we try again we should get a failure
+        d = get_retry_limiter("test_dest", self.clock, store)
+        self.pump()
+        self.failureResultOf(d, NotRetryingDestination)
+
+        #
+        # advance the clock and try again
+        #
+
+        self.pump(MIN_RETRY_INTERVAL)
+        d = get_retry_limiter("test_dest", self.clock, store)
+        self.pump()
+        limiter = self.successResultOf(d)
+
+        self.pump(1)
+        try:
+            with limiter:
+                self.pump(1)
+                retry_ts = self.clock.time_msec()
+                raise AssertionError("argh")
+        except AssertionError:
+            pass
+
+        # wait for the update to land
+        self.pump()
+
+        d = store.get_destination_retry_timings("test_dest")
+        self.pump()
+        new_timings = self.successResultOf(d)
+        self.assertEqual(new_timings["failure_ts"], failure_ts)
+        self.assertEqual(new_timings["retry_last_ts"], retry_ts)
+        self.assertGreaterEqual(
+            new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
+        )
+        self.assertLessEqual(
+            new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
+        )
+
+        #
+        # one more go, with success
+        #
+        self.pump(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0)
+        d = get_retry_limiter("test_dest", self.clock, store)
+        self.pump()
+        limiter = self.successResultOf(d)
+
+        self.pump(1)
+        with limiter:
+            self.pump(1)
+
+        # wait for the update to land
+        self.pump()
+
+        d = store.get_destination_retry_timings("test_dest")
+        self.pump()
+        new_timings = self.successResultOf(d)
+        self.assertIsNone(new_timings)

From 70c52821ce9e755e4a5c3081510fb1260f609ee3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 17 Sep 2019 12:41:23 +0100
Subject: [PATCH 050/276] Fix race condition in room stats. (#6029)

Broke in #5971

Basically the bug is that if get_current_state_deltas returns no new updates and we then take the max pos, its possible that we miss an update that happens in between the two calls. (e.g. get_current_state_deltas looks up to stream pos 5, then an event persists and so getting the max stream pos returns 6, meaning that next time we check for things with a stream pos bigger than 6)
---
 changelog.d/6029.bugfix   |  1 +
 synapse/handlers/stats.py | 14 ++++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6029.bugfix

diff --git a/changelog.d/6029.bugfix b/changelog.d/6029.bugfix
new file mode 100644
index 000000000..9ea095103
--- /dev/null
+++ b/changelog.d/6029.bugfix
@@ -0,0 +1 @@
+Fix room and user stats tracking.
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 3c265f371..cbac7c347 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -84,6 +84,13 @@ class StatsHandler(StateDeltasHandler):
         # Loop round handling deltas until we're up to date
 
         while True:
+            # Be sure to read the max stream_ordering *before* checking if there are any outstanding
+            # deltas, since there is otherwise a chance that we could miss updates which arrive
+            # after we check the deltas.
+            room_max_stream_ordering = yield self.store.get_room_max_stream_ordering()
+            if self.pos == room_max_stream_ordering:
+                break
+
             deltas = yield self.store.get_current_state_deltas(self.pos)
 
             if deltas:
@@ -94,7 +101,7 @@ class StatsHandler(StateDeltasHandler):
             else:
                 room_deltas = {}
                 user_deltas = {}
-                max_pos = yield self.store.get_room_max_stream_ordering()
+                max_pos = room_max_stream_ordering
 
             # Then count deltas for total_events and total_event_bytes.
             room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes(
@@ -117,10 +124,9 @@ class StatsHandler(StateDeltasHandler):
                 stream_id=max_pos,
             )
 
-            event_processing_positions.labels("stats").set(max_pos)
+            logger.debug("Handled room stats to %s -> %s", self.pos, max_pos)
 
-            if self.pos == max_pos:
-                break
+            event_processing_positions.labels("stats").set(max_pos)
 
             self.pos = max_pos
 

From 379d2a8c3918557bacdadea6b508bddd1ce20eaf Mon Sep 17 00:00:00 2001
From: dstipp <dstipp@users.noreply.github.com>
Date: Tue, 17 Sep 2019 07:55:29 -0400
Subject: [PATCH 051/276] (#5849) Convert rst to markdown (#6040)

Converting some of the rst documentation to markdown.  Attempted to
preserve whitespace and line breaks to minimize cosmetic change.
---
 CONTRIBUTING.rst                             |   2 +-
 INSTALL.md                                   |   4 +-
 README.rst                                   |   6 +-
 UPGRADE.rst                                  |   2 +-
 changelog.d/5849.doc                         |   1 +
 docs/{CAPTCHA_SETUP.rst => CAPTCHA_SETUP.md} |  29 +-
 docs/MSC1711_certificates_FAQ.md             |   4 +-
 docs/README.md                               |   7 +
 docs/README.rst                              |   6 -
 docs/ancient_architecture_notes.md           |  81 +++
 docs/ancient_architecture_notes.rst          |  59 ---
 docs/application_services.md                 |  31 ++
 docs/application_services.rst                |  35 --
 docs/architecture.md                         |  65 +++
 docs/architecture.rst                        |  68 ---
 docs/code_style.md                           | 169 +++++++
 docs/code_style.rst                          | 180 -------
 docs/federate.md                             |   4 +-
 docs/log_contexts.md                         | 494 ++++++++++++++++++
 docs/log_contexts.rst                        | 498 -------------------
 docs/media_repository.md                     |  30 ++
 docs/media_repository.rst                    |  27 -
 docs/metrics-howto.md                        | 217 ++++++++
 docs/metrics-howto.rst                       | 285 -----------
 docs/opentracing.md                          |  93 ++++
 docs/opentracing.rst                         | 123 -----
 docs/password_auth_providers.md              | 116 +++++
 docs/password_auth_providers.rst             | 113 -----
 docs/postgres.md                             | 164 ++++++
 docs/postgres.rst                            | 166 -------
 docs/replication.md                          |  37 ++
 docs/replication.rst                         |  40 --
 docs/reverse_proxy.md                        | 123 +++++
 docs/reverse_proxy.rst                       | 112 -----
 docs/sample_config.yaml                      |  12 +-
 docs/tcp_replication.md                      | 249 ++++++++++
 docs/tcp_replication.rst                     | 249 ----------
 docs/turn-howto.md                           | 123 +++++
 docs/turn-howto.rst                          | 127 -----
 docs/{workers.rst => workers.md}             | 117 ++---
 synapse/config/server.py                     |  12 +-
 41 files changed, 2088 insertions(+), 2192 deletions(-)
 create mode 100644 changelog.d/5849.doc
 rename docs/{CAPTCHA_SETUP.rst => CAPTCHA_SETUP.md} (50%)
 create mode 100644 docs/README.md
 delete mode 100644 docs/README.rst
 create mode 100644 docs/ancient_architecture_notes.md
 delete mode 100644 docs/ancient_architecture_notes.rst
 create mode 100644 docs/application_services.md
 delete mode 100644 docs/application_services.rst
 create mode 100644 docs/architecture.md
 delete mode 100644 docs/architecture.rst
 create mode 100644 docs/code_style.md
 delete mode 100644 docs/code_style.rst
 create mode 100644 docs/log_contexts.md
 delete mode 100644 docs/log_contexts.rst
 create mode 100644 docs/media_repository.md
 delete mode 100644 docs/media_repository.rst
 create mode 100644 docs/metrics-howto.md
 delete mode 100644 docs/metrics-howto.rst
 create mode 100644 docs/opentracing.md
 delete mode 100644 docs/opentracing.rst
 create mode 100644 docs/password_auth_providers.md
 delete mode 100644 docs/password_auth_providers.rst
 create mode 100644 docs/postgres.md
 delete mode 100644 docs/postgres.rst
 create mode 100644 docs/replication.md
 delete mode 100644 docs/replication.rst
 create mode 100644 docs/reverse_proxy.md
 delete mode 100644 docs/reverse_proxy.rst
 create mode 100644 docs/tcp_replication.md
 delete mode 100644 docs/tcp_replication.rst
 create mode 100644 docs/turn-howto.md
 delete mode 100644 docs/turn-howto.rst
 rename docs/{workers.rst => workers.md} (78%)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 94dc65048..620dc88ce 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -56,7 +56,7 @@ Code style
 
 All Matrix projects have a well-defined code-style - and sometimes we've even
 got as far as documenting it... For instance, synapse's code style doc lives
-at https://github.com/matrix-org/synapse/tree/master/docs/code_style.rst.
+at https://github.com/matrix-org/synapse/tree/master/docs/code_style.md.
 
 Please ensure your changes match the cosmetic style of the existing project,
 and **never** mix cosmetic and functional changes in the same commit, as it
diff --git a/INSTALL.md b/INSTALL.md
index 6bce370ea..3eb979c36 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -373,7 +373,7 @@ is suitable for local testing, but for any practical use, you will either need
 to enable a reverse proxy, or configure Synapse to expose an HTTPS port.
 
 For information on using a reverse proxy, see
-[docs/reverse_proxy.rst](docs/reverse_proxy.rst).
+[docs/reverse_proxy.md](docs/reverse_proxy.md).
 
 To configure Synapse to expose an HTTPS port, you will need to edit
 `homeserver.yaml`, as follows:
@@ -446,7 +446,7 @@ on your server even if `enable_registration` is `false`.
 ## Setting up a TURN server
 
 For reliable VoIP calls to be routed via this homeserver, you MUST configure
-a TURN server.  See [docs/turn-howto.rst](docs/turn-howto.rst) for details.
+a TURN server.  See [docs/turn-howto.md](docs/turn-howto.md) for details.
 
 ## URL previews
 
diff --git a/README.rst b/README.rst
index bbff8de5a..2948fd076 100644
--- a/README.rst
+++ b/README.rst
@@ -115,7 +115,7 @@ Registering a new user from a client
 
 By default, registration of new users via Matrix clients is disabled. To enable
 it, specify ``enable_registration: true`` in ``homeserver.yaml``. (It is then
-recommended to also set up CAPTCHA - see `<docs/CAPTCHA_SETUP.rst>`_.)
+recommended to also set up CAPTCHA - see `<docs/CAPTCHA_SETUP.md>`_.)
 
 Once ``enable_registration`` is set to ``true``, it is possible to register a
 user via `riot.im <https://riot.im/app/#/register>`_ or other Matrix clients.
@@ -186,7 +186,7 @@ Almost all installations should opt to use PostreSQL. Advantages include:
   synapse itself.
 
 For information on how to install and use PostgreSQL, please see
-`docs/postgres.rst <docs/postgres.rst>`_.
+`docs/postgres.md <docs/postgres.md>`_.
 
 .. _reverse-proxy:
 
@@ -201,7 +201,7 @@ It is recommended to put a reverse proxy such as
 doing so is that it means that you can expose the default https port (443) to
 Matrix clients without needing to run Synapse with root privileges.
 
-For information on configuring one, see `<docs/reverse_proxy.rst>`_.
+For information on configuring one, see `<docs/reverse_proxy.md>`_.
 
 Identity Servers
 ================
diff --git a/UPGRADE.rst b/UPGRADE.rst
index dddcd75fd..5aaf80490 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -103,7 +103,7 @@ Upgrading to v1.2.0
 ===================
 
 Some counter metrics have been renamed, with the old names deprecated. See
-`the metrics documentation <docs/metrics-howto.rst#renaming-of-metrics--deprecation-of-old-names-in-12>`_
+`the metrics documentation <docs/metrics-howto.md#renaming-of-metrics--deprecation-of-old-names-in-12>`_
 for details.
 
 Upgrading to v1.1.0
diff --git a/changelog.d/5849.doc b/changelog.d/5849.doc
new file mode 100644
index 000000000..fbe62e863
--- /dev/null
+++ b/changelog.d/5849.doc
@@ -0,0 +1 @@
+Convert documentation to markdown (from rst)
diff --git a/docs/CAPTCHA_SETUP.rst b/docs/CAPTCHA_SETUP.md
similarity index 50%
rename from docs/CAPTCHA_SETUP.rst
rename to docs/CAPTCHA_SETUP.md
index 0c22ee4ff..5f9057530 100644
--- a/docs/CAPTCHA_SETUP.rst
+++ b/docs/CAPTCHA_SETUP.md
@@ -1,30 +1,31 @@
+# Overview
 Captcha can be enabled for this home server. This file explains how to do that.
 The captcha mechanism used is Google's ReCaptcha. This requires API keys from Google.
 
-Getting keys
-------------
+## Getting keys
+
 Requires a public/private key pair from:
 
-https://developers.google.com/recaptcha/
+<https://developers.google.com/recaptcha/>
 
 Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option
 
-Setting ReCaptcha Keys
-----------------------
+## Setting ReCaptcha Keys
+
 The keys are a config option on the home server config. If they are not
-visible, you can generate them via --generate-config. Set the following value::
+visible, you can generate them via `--generate-config`. Set the following value:
 
-  recaptcha_public_key: YOUR_PUBLIC_KEY
-  recaptcha_private_key: YOUR_PRIVATE_KEY
+    recaptcha_public_key: YOUR_PUBLIC_KEY
+    recaptcha_private_key: YOUR_PRIVATE_KEY
 
-In addition, you MUST enable captchas via::
+In addition, you MUST enable captchas via:
 
-  enable_registration_captcha: true
+    enable_registration_captcha: true
+
+## Configuring IP used for auth
 
-Configuring IP used for auth
-----------------------------
 The ReCaptcha API requires that the IP address of the user who solved the
 captcha is sent. If the client is connecting through a proxy or load balancer,
-it may be required to use the X-Forwarded-For (XFF) header instead of the origin
-IP address. This can be configured using the x_forwarded directive in the
+it may be required to use the `X-Forwarded-For` (XFF) header instead of the origin
+IP address. This can be configured using the `x_forwarded` directive in the
 listeners section of the homeserver.yaml configuration file.
diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md
index 83497380d..80bd1294c 100644
--- a/docs/MSC1711_certificates_FAQ.md
+++ b/docs/MSC1711_certificates_FAQ.md
@@ -147,7 +147,7 @@ your domain, you can simply route all traffic through the reverse proxy by
 updating the SRV record appropriately (or removing it, if the proxy listens on
 8448).
 
-See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a
+See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 #### Option 3: add a .well-known file to delegate your matrix traffic
@@ -319,7 +319,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will
 find it easier to direct federation traffic to a reverse proxy and manage their
 own TLS certificates, and this is a supported configuration.
 
-See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a
+See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 ### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy?
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..3c6ea48c6
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,7 @@
+# Synapse Documentation
+
+This directory contains documentation specific to the `synapse` homeserver.
+
+All matrix-generic documentation now lives in its own project, located at [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc)
+
+(Note:  some items here may be moved to [matrix-org/matrix-doc](https://github.com/matrix-org/matrix-doc) at some point in the future.)
diff --git a/docs/README.rst b/docs/README.rst
deleted file mode 100644
index 3012da8b1..000000000
--- a/docs/README.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-All matrix-generic documentation now lives in its own project at
-
-github.com/matrix-org/matrix-doc.git
-
-Only Synapse implementation-specific documentation lives here now
-(together with some older stuff will be shortly migrated over to matrix-doc)
diff --git a/docs/ancient_architecture_notes.md b/docs/ancient_architecture_notes.md
new file mode 100644
index 000000000..3ea8976cc
--- /dev/null
+++ b/docs/ancient_architecture_notes.md
@@ -0,0 +1,81 @@
+> **Warning**
+>  These architecture notes are spectacularly old, and date back
+> to when Synapse was just federation code in isolation. This should be
+> merged into the main spec.
+
+# Server to Server
+
+## Server to Server Stack
+
+To use the server to server stack, home servers should only need to
+interact with the Messaging layer.
+
+The server to server side of things is designed into 4 distinct layers:
+
+1.  Messaging Layer
+2.  Pdu Layer
+3.  Transaction Layer
+4.  Transport Layer
+
+Where the bottom (the transport layer) is what talks to the internet via
+HTTP, and the top (the messaging layer) talks to the rest of the Home
+Server with a domain specific API.
+
+1. **Messaging Layer**
+
+    This is what the rest of the Home Server hits to send messages, join rooms,
+    etc. It also allows you to register callbacks for when it get's notified by
+    lower levels that e.g. a new message has been received.
+
+    It is responsible for serializing requests to send to the data
+    layer, and to parse requests received from the data layer.
+
+2. **PDU Layer**
+
+    This layer handles:
+
+		- duplicate `pdu_id`'s - i.e., it makes sure we ignore them.
+		- responding to requests for a given `pdu_id`
+		- responding to requests for all metadata for a given context (i.e. room)
+		- handling incoming backfill requests
+
+		So it has to parse incoming messages to discover which are metadata and
+    which aren't, and has to correctly clobber existing metadata where
+    appropriate.
+
+    For incoming PDUs, it has to check the PDUs it references to see
+    if we have missed any. If we have go and ask someone (another
+    home server) for it.
+
+3. **Transaction Layer**
+
+		This layer makes incoming requests idempotent. i.e., it stores
+		which transaction id's we have seen and what our response were.
+		If we have already seen a message with the given transaction id,
+		we do not notify higher levels but simply respond with the
+		previous response.
+
+		`transaction_id` is from "`GET /send/<tx_id>/`"
+
+		It's also responsible for batching PDUs into single transaction for
+		sending to remote destinations, so that we only ever have one
+		transaction in flight to a given destination at any one time.
+
+		This is also responsible for answering requests for things after a
+		given set of transactions, i.e., ask for everything after 'ver' X.
+
+4. **Transport Layer**
+
+		This is responsible for starting a HTTP server and hitting the
+		correct callbacks on the Transaction layer, as well as sending
+		both data and requests for data.
+
+## Persistence
+
+We persist things in a single sqlite3 database. All database queries get
+run on a separate, dedicated thread. This that we only ever have one
+query running at a time, making it a lot easier to do things in a safe
+manner.
+
+The queries are located in the `synapse.persistence.transactions` module,
+and the table information in the `synapse.persistence.tables` module.
diff --git a/docs/ancient_architecture_notes.rst b/docs/ancient_architecture_notes.rst
deleted file mode 100644
index 2a5a2613c..000000000
--- a/docs/ancient_architecture_notes.rst
+++ /dev/null
@@ -1,59 +0,0 @@
-.. WARNING::
-  These architecture notes are spectacularly old, and date back to when Synapse 
-  was just federation code in isolation.  This should be merged into the main
-  spec.
-  
-
-= Server to Server =
-
-== Server to Server Stack ==
-
-To use the server to server stack, home servers should only need to interact with the Messaging layer.
-
-The server to server side of things is designed into 4 distinct layers:
-
-    1. Messaging Layer
-    2. Pdu Layer
-    3. Transaction Layer
-    4. Transport Layer
-
-Where the bottom (the transport layer) is what talks to the internet via HTTP, and the top (the messaging layer) talks to the rest of the Home Server with a domain specific API.
-
-1. Messaging Layer
-    This is what the rest of the Home Server hits to send messages, join rooms, etc. It also allows you to register callbacks for when it get's notified by lower levels that e.g. a new message has been received.
-
-    It is responsible for serializing requests to send to the data layer, and to parse requests received from the data layer.
-
-
-2. PDU Layer
-    This layer handles: 
-        * duplicate pdu_id's - i.e., it makes sure we ignore them. 
-        * responding to requests for a given pdu_id
-        * responding to requests for all metadata for a given context (i.e. room)
-        * handling incoming backfill requests
-
-    So it has to parse incoming messages to discover which are metadata and which aren't, and has to correctly clobber existing metadata where appropriate.
-
-    For incoming PDUs, it has to check the PDUs it references to see if we have missed any. If we have go and ask someone (another home server) for it.    
-
-
-3. Transaction Layer
-    This layer makes incoming requests idempotent. I.e., it stores which transaction id's we have seen and what our response were. If we have already seen a message with the given transaction id, we do not notify higher levels but simply respond with the previous response.
-
-transaction_id is from "GET /send/<tx_id>/"
-
-    It's also responsible for batching PDUs into single transaction for sending to remote destinations, so that we only ever have one transaction in flight to a given destination at any one time.
-
-    This is also responsible for answering requests for things after a given set of transactions, i.e., ask for everything after 'ver' X.
-
-
-4. Transport Layer
-    This is responsible for starting a HTTP server and hitting the correct callbacks on the Transaction layer, as well as sending both data and requests for data.
-
-
-== Persistence ==
-
-We persist things in a single sqlite3 database. All database queries get run on a separate, dedicated thread. This that we only ever have one query running at a time, making it a lot easier to do things in a safe manner.
-
-The queries are located in the synapse.persistence.transactions module, and the table information in the synapse.persistence.tables module.
-
diff --git a/docs/application_services.md b/docs/application_services.md
new file mode 100644
index 000000000..06cb79f1f
--- /dev/null
+++ b/docs/application_services.md
@@ -0,0 +1,31 @@
+# Registering an Application Service
+
+The registration of new application services depends on the homeserver used. 
+In synapse, you need to create a new configuration file for your AS and add it
+to the list specified under the `app_service_config_files` config
+option in your synapse config.
+
+For example:
+
+```yaml
+app_service_config_files:
+- /home/matrix/.synapse/<your-AS>.yaml
+```
+
+The format of the AS configuration file is as follows:
+
+```yaml
+url: <base url of AS>
+as_token: <token AS will add to requests to HS>
+hs_token: <token HS will add to requests to AS>
+sender_localpart: <localpart of AS user>
+namespaces:
+  users:  # List of users we're interested in
+    - exclusive: <bool>
+      regex: <regex>
+    - ...
+  aliases: []  # List of aliases we're interested in
+  rooms: [] # List of room ids we're interested in
+```
+
+See the [spec](https://matrix.org/docs/spec/application_service/unstable.html) for further details on how application services work.
diff --git a/docs/application_services.rst b/docs/application_services.rst
deleted file mode 100644
index fbc0c7e96..000000000
--- a/docs/application_services.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-Registering an Application Service
-==================================
-
-The registration of new application services depends on the homeserver used. 
-In synapse, you need to create a new configuration file for your AS and add it
-to the list specified under the ``app_service_config_files`` config
-option in your synapse config.
-
-For example:
-
-.. code-block:: yaml
-
-  app_service_config_files:
-  - /home/matrix/.synapse/<your-AS>.yaml
-
-
-The format of the AS configuration file is as follows:
-
-..  code-block:: yaml
-
-    url: <base url of AS>
-    as_token: <token AS will add to requests to HS>
-    hs_token: <token HS will add to requests to AS>
-    sender_localpart: <localpart of AS user>
-    namespaces:
-      users:  # List of users we're interested in
-        - exclusive: <bool>
-          regex: <regex>
-        - ...
-      aliases: []  # List of aliases we're interested in
-      rooms: [] # List of room ids we're interested in
-
-See the spec_ for further details on how application services work.
-
-.. _spec: https://matrix.org/docs/spec/application_service/unstable.html
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 000000000..0c7f315f3
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,65 @@
+# Synapse Architecture
+
+As of the end of Oct 2014, Synapse's overall architecture looks like:
+
+        synapse
+        .-----------------------------------------------------.
+        |                          Notifier                   |
+        |                            ^  |                     |
+        |                            |  |                     |
+        |                  .------------|------.              |
+        |                  | handlers/  |      |              |
+        |                  |            v      |              |
+        |                  | Event*Handler <--------> rest/* <=> Client
+        |                  | Rooms*Handler     |              |
+    HS <=> federation/* <==> FederationHandler |              |
+        |      |           | PresenceHandler   |              |
+        |      |           | TypingHandler     |              |
+        |      |           '-------------------'              |
+        |      |                 |     |                      |
+        |      |              state/*  |                      |
+        |      |                 |     |                      |
+        |      |                 v     v                      |
+        |      `--------------> storage/*                     |
+        |                          |                          |
+        '--------------------------|--------------------------'
+                                   v
+                                .----.
+                                | DB |
+                                '----'
+
+-   Handlers: business logic of synapse itself. Follows a set contract of BaseHandler:
+    -   BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic):
+        -   handle_state(event)
+        -   auth(event)
+        -   persist_event(event)
+        -   notify notifier or federation(event)
+    -   PresenceHandler: use distributor to get EDUs out of Federation.
+        Very lightweight logic built on the distributor
+    -   TypingHandler: use distributor to get EDUs out of Federation.
+        Very lightweight logic built on the distributor
+    -   EventsHandler: handles the events stream...
+    -   FederationHandler: - gets PDU from Federation Layer; turns into
+        an event; follows basehandler functionality.
+    -   RoomsHandler: does all the room logic, including members - lots
+        of classes in RoomsHandler.
+    -   ProfileHandler: talks to the storage to store/retrieve profile
+        info.
+-   EventFactory: generates events of particular event types.
+-   Notifier: Backs the events handler
+-   REST: Interfaces handlers and events to the outside world via
+    HTTP/JSON. Converts events back and forth from JSON.
+-   Federation: holds the HTTP client & server to talk to other servers.
+    Does replication to make sure there's nothing missing in the graph.
+    Handles reliability. Handles txns.
+-   Distributor: generic event bus. used for presence & typing only
+    currently. Notifier could be implemented using Distributor - so far
+    we are only using for things which actually /require/ dynamic
+    pluggability however as it can obfuscate the actual flow of control.
+-   Auth: helper singleton to say whether a given event is allowed to do
+    a given thing (TODO: put this on the diagram)
+-   State: helper singleton: does state conflict resolution. You give it
+    an event and it tells you if it actually updates the state or not,
+    and annotates the event up properly and handles merge conflict
+    resolution.
+-   Storage: abstracts the storage engine.
diff --git a/docs/architecture.rst b/docs/architecture.rst
deleted file mode 100644
index 98050428b..000000000
--- a/docs/architecture.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-Synapse Architecture
-====================
-
-As of the end of Oct 2014, Synapse's overall architecture looks like::
-
-        synapse
-        .-----------------------------------------------------.
-        |                          Notifier                   |
-        |                            ^  |                     |
-        |                            |  |                     |
-        |                  .------------|------.              |
-        |                  | handlers/  |      |              |
-        |                  |            v      |              |
-        |                  | Event*Handler <--------> rest/* <=> Client
-        |                  | Rooms*Handler     |              |
-  HSes <=> federation/* <==> FederationHandler |              |
-        |      |           | PresenceHandler   |              |
-        |      |           | TypingHandler     |              |
-        |      |           '-------------------'              |
-        |      |                 |     |                      |
-        |      |              state/*  |                      |
-        |      |                 |     |                      |
-        |      |                 v     v                      |
-        |      `--------------> storage/*                     |
-        |                          |                          |
-        '--------------------------|--------------------------'
-                                   v
-                                .----.
-                                | DB |
-                                '----'
-
-* Handlers: business logic of synapse itself.  Follows a set contract of BaseHandler:
-
-  - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic):
- 
-    + handle_state(event)
-    + auth(event)
-    + persist_event(event)
-    + notify notifier or federation(event)
-   
-  - PresenceHandler: use distributor to get EDUs out of Federation.  Very
-    lightweight logic built on the distributor
-  - TypingHandler: use distributor to get EDUs out of Federation.  Very
-    lightweight logic built on the distributor
-  - EventsHandler: handles the events stream...
-  - FederationHandler: - gets PDU from Federation Layer; turns into an event;
-    follows basehandler functionality.
-  - RoomsHandler: does all the room logic, including members - lots of classes in
-    RoomsHandler.
-  - ProfileHandler: talks to the storage to store/retrieve profile info.
-
-* EventFactory: generates events of particular event types.
-* Notifier: Backs the events handler
-* REST: Interfaces handlers and events to the outside world via HTTP/JSON.
-  Converts events back and forth from JSON.
-* Federation: holds the HTTP client & server to talk to other servers.  Does
-  replication to make sure there's nothing missing in the graph.  Handles
-  reliability.  Handles txns.
-* Distributor: generic event bus. used for presence & typing only currently. 
-  Notifier could be implemented using Distributor - so far we are only using for
-  things which actually /require/ dynamic pluggability however as it can
-  obfuscate the actual flow of control.
-* Auth: helper singleton to say whether a given event is allowed to do a given
-  thing  (TODO: put this on the diagram)
-* State: helper singleton: does state conflict resolution. You give it an event
-  and it tells you if it actually updates the state or not, and annotates the
-  event up properly and handles merge conflict resolution.
-* Storage: abstracts the storage engine.
diff --git a/docs/code_style.md b/docs/code_style.md
new file mode 100644
index 000000000..f983f72d6
--- /dev/null
+++ b/docs/code_style.md
@@ -0,0 +1,169 @@
+# Code Style
+
+## Formatting tools
+
+The Synapse codebase uses a number of code formatting tools in order to
+quickly and automatically check for formatting (and sometimes logical)
+errors in code.
+
+The necessary tools are detailed below.
+
+-   **black**
+
+    The Synapse codebase uses [black](https://pypi.org/project/black/)
+    as an opinionated code formatter, ensuring all comitted code is
+    properly formatted.
+
+    First install `black` with:
+
+        pip install --upgrade black
+
+    Have `black` auto-format your code (it shouldn't change any
+    functionality) with:
+
+        black . --exclude="\.tox|build|env"
+
+-   **flake8**
+
+    `flake8` is a code checking tool. We require code to pass `flake8`
+    before being merged into the codebase.
+
+    Install `flake8` with:
+
+        pip install --upgrade flake8
+
+    Check all application and test code with:
+
+        flake8 synapse tests
+
+-   **isort**
+
+    `isort` ensures imports are nicely formatted, and can suggest and
+    auto-fix issues such as double-importing.
+
+    Install `isort` with:
+
+        pip install --upgrade isort
+
+    Auto-fix imports with:
+
+        isort -rc synapse tests
+
+    `-rc` means to recursively search the given directories.
+
+It's worth noting that modern IDEs and text editors can run these tools
+automatically on save. It may be worth looking into whether this
+functionality is supported in your editor for a more convenient
+development workflow. It is not, however, recommended to run `flake8` on
+save as it takes a while and is very resource intensive.
+
+## General rules
+
+-   **Naming**:
+    -   Use camel case for class and type names
+    -   Use underscores for functions and variables.
+-   **Docstrings**: should follow the [google code
+    style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings).
+    This is so that we can generate documentation with
+    [sphinx](http://sphinxcontrib-napoleon.readthedocs.org/en/latest/).
+    See the
+    [examples](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+    in the sphinx documentation.
+-   **Imports**:
+    -   Imports should be sorted by `isort` as described above.
+    -   Prefer to import classes and functions rather than packages or
+        modules.
+
+        Example:
+
+            from synapse.types import UserID
+            ...
+            user_id = UserID(local, server)
+
+        is preferred over:
+
+            from synapse import types
+            ...
+            user_id = types.UserID(local, server)
+
+        (or any other variant).
+
+        This goes against the advice in the Google style guide, but it
+        means that errors in the name are caught early (at import time).
+
+    -   Avoid wildcard imports (`from synapse.types import *`) and
+        relative imports (`from .types import UserID`).
+
+## Configuration file format
+
+The [sample configuration file](./sample_config.yaml) acts as a
+reference to Synapse's configuration options for server administrators.
+Remember that many readers will be unfamiliar with YAML and server
+administration in general, so that it is important that the file be as
+easy to understand as possible, which includes following a consistent
+format.
+
+Some guidelines follow:
+
+-   Sections should be separated with a heading consisting of a single
+    line prefixed and suffixed with `##`. There should be **two** blank
+    lines before the section header, and **one** after.
+-   Each option should be listed in the file with the following format:
+    -   A comment describing the setting. Each line of this comment
+        should be prefixed with a hash (`#`) and a space.
+
+        The comment should describe the default behaviour (ie, what
+        happens if the setting is omitted), as well as what the effect
+        will be if the setting is changed.
+
+        Often, the comment end with something like "uncomment the
+        following to <do action>".
+
+    -   A line consisting of only `#`.
+    -   A commented-out example setting, prefixed with only `#`.
+
+        For boolean (on/off) options, convention is that this example
+        should be the *opposite* to the default (so the comment will end
+        with "Uncomment the following to enable [or disable]
+        <feature>." For other options, the example should give some
+        non-default value which is likely to be useful to the reader.
+
+-   There should be a blank line between each option.
+-   Where several settings are grouped into a single dict, *avoid* the
+    convention where the whole block is commented out, resulting in
+    comment lines starting `# #`, as this is hard to read and confusing
+    to edit. Instead, leave the top-level config option uncommented, and
+    follow the conventions above for sub-options. Ensure that your code
+    correctly handles the top-level option being set to `None` (as it
+    will be if no sub-options are enabled).
+-   Lines should be wrapped at 80 characters.
+
+Example:
+
+    ## Frobnication ##
+
+    # The frobnicator will ensure that all requests are fully frobnicated.
+    # To enable it, uncomment the following.
+    #
+    #frobnicator_enabled: true
+
+    # By default, the frobnicator will frobnicate with the default frobber.
+    # The following will make it use an alternative frobber.
+    #
+    #frobincator_frobber: special_frobber
+
+    # Settings for the frobber
+    #
+    frobber:
+       # frobbing speed. Defaults to 1.
+       #
+       #speed: 10
+
+       # frobbing distance. Defaults to 1000.
+       #
+       #distance: 100
+
+Note that the sample configuration is generated from the synapse code
+and is maintained by a script, `scripts-dev/generate_sample_config`.
+Making sure that the output from this script matches the desired format
+is left as an exercise for the reader!
diff --git a/docs/code_style.rst b/docs/code_style.rst
deleted file mode 100644
index 39ac4ebed..000000000
--- a/docs/code_style.rst
+++ /dev/null
@@ -1,180 +0,0 @@
-Code Style
-==========
-
-Formatting tools
-----------------
-
-The Synapse codebase uses a number of code formatting tools in order to
-quickly and automatically check for formatting (and sometimes logical) errors
-in code.
-
-The necessary tools are detailed below.
-
-- **black**
-
-  The Synapse codebase uses `black <https://pypi.org/project/black/>`_ as an
-  opinionated code formatter, ensuring all comitted code is properly
-  formatted.
-
-  First install ``black`` with::
-
-    pip install --upgrade black
-
-  Have ``black`` auto-format your code (it shouldn't change any functionality)
-  with::
-
-    black . --exclude="\.tox|build|env"
-
-- **flake8**
-
-  ``flake8`` is a code checking tool. We require code to pass ``flake8`` before being merged into the codebase.
-
-  Install ``flake8`` with::
-
-    pip install --upgrade flake8
-
-  Check all application and test code with::
-
-    flake8 synapse tests
-
-- **isort**
-
-  ``isort`` ensures imports are nicely formatted, and can suggest and
-  auto-fix issues such as double-importing.
-
-  Install ``isort`` with::
-
-    pip install --upgrade isort
-
-  Auto-fix imports with::
-
-    isort -rc synapse tests
-
-  ``-rc`` means to recursively search the given directories.
-
-It's worth noting that modern IDEs and text editors can run these tools
-automatically on save. It may be worth looking into whether this
-functionality is supported in your editor for a more convenient development
-workflow. It is not, however, recommended to run ``flake8`` on save as it
-takes a while and is very resource intensive.
-
-General rules
--------------
-
-- **Naming**:
-
-  - Use camel case for class and type names
-  - Use underscores for functions and variables.
-
-- **Docstrings**: should follow the `google code style
-  <https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings>`_.
-  This is so that we can generate documentation with `sphinx
-  <http://sphinxcontrib-napoleon.readthedocs.org/en/latest/>`_. See the
-  `examples
-  <http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html>`_
-  in the sphinx documentation.
-
-- **Imports**:
-
-  - Imports should be sorted by ``isort`` as described above.
-
-  - Prefer to import classes and functions rather than packages or modules.
-
-    Example::
-
-      from synapse.types import UserID
-      ...
-      user_id = UserID(local, server)
-
-    is preferred over::
-
-      from synapse import types
-      ...
-      user_id = types.UserID(local, server)
-
-    (or any other variant).
-
-    This goes against the advice in the Google style guide, but it means that
-    errors in the name are caught early (at import time).
-
-  - Avoid wildcard imports (``from synapse.types import *``) and relative
-    imports (``from .types import UserID``).
-
-Configuration file format
--------------------------
-
-The `sample configuration file <./sample_config.yaml>`_ acts as a reference to
-Synapse's configuration options for server administrators. Remember that many
-readers will be unfamiliar with YAML and server administration in general, so
-that it is important that the file be as easy to understand as possible, which
-includes following a consistent format.
-
-Some guidelines follow:
-
-* Sections should be separated with a heading consisting of a single line
-  prefixed and suffixed with ``##``. There should be **two** blank lines
-  before the section header, and **one** after.
-
-* Each option should be listed in the file with the following format:
-
-  * A comment describing the setting. Each line of this comment should be
-    prefixed with a hash (``#``) and a space.
-
-    The comment should describe the default behaviour (ie, what happens if
-    the setting is omitted), as well as what the effect will be if the
-    setting is changed.
-
-    Often, the comment end with something like "uncomment the
-    following to \<do action>".
-
-  * A line consisting of only ``#``.
-
-  * A commented-out example setting, prefixed with only ``#``.
-
-    For boolean (on/off) options, convention is that this example should be
-    the *opposite* to the default (so the comment will end with "Uncomment
-    the following to enable [or disable] \<feature\>." For other options,
-    the example should give some non-default value which is likely to be
-    useful to the reader.
-
-* There should be a blank line between each option.
-
-* Where several settings are grouped into a single dict, *avoid* the
-  convention where the whole block is commented out, resulting in comment
-  lines starting ``# #``, as this is hard to read and confusing to
-  edit. Instead, leave the top-level config option uncommented, and follow
-  the conventions above for sub-options. Ensure that your code correctly
-  handles the top-level option being set to ``None`` (as it will be if no
-  sub-options are enabled).
-
-* Lines should be wrapped at 80 characters.
-
-Example::
-
-    ## Frobnication ##
-
-    # The frobnicator will ensure that all requests are fully frobnicated.
-    # To enable it, uncomment the following.
-    #
-    #frobnicator_enabled: true
-
-    # By default, the frobnicator will frobnicate with the default frobber.
-    # The following will make it use an alternative frobber.
-    #
-    #frobincator_frobber: special_frobber
-
-    # Settings for the frobber
-    #
-    frobber:
-       # frobbing speed. Defaults to 1.
-       #
-       #speed: 10
-
-       # frobbing distance. Defaults to 1000.
-       #
-       #distance: 100
-
-Note that the sample configuration is generated from the synapse code and is
-maintained by a script, ``scripts-dev/generate_sample_config``. Making sure
-that the output from this script matches the desired format is left as an
-exercise for the reader!
diff --git a/docs/federate.md b/docs/federate.md
index 6d6bb85e1..193e2d2df 100644
--- a/docs/federate.md
+++ b/docs/federate.md
@@ -148,7 +148,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will
 find it easier to direct federation traffic to a reverse proxy and manage their
 own TLS certificates, and this is a supported configuration.
 
-See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a
+See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 #### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy?
@@ -184,7 +184,7 @@ a complicated dance which requires connections in both directions).
 
 Another common problem is that people on other servers can't join rooms that
 you invite them to. This can be caused by an incorrectly-configured reverse
-proxy: see [reverse_proxy.rst](<reverse_proxy.rst>) for instructions on how to correctly
+proxy: see [reverse_proxy.md](<reverse_proxy.md>) for instructions on how to correctly
 configure a reverse proxy.
 
 ## Running a Demo Federation of Synapses
diff --git a/docs/log_contexts.md b/docs/log_contexts.md
new file mode 100644
index 000000000..5331e8c88
--- /dev/null
+++ b/docs/log_contexts.md
@@ -0,0 +1,494 @@
+# Log Contexts
+
+To help track the processing of individual requests, synapse uses a
+'`log context`' to track which request it is handling at any given
+moment. This is done via a thread-local variable; a `logging.Filter` is
+then used to fish the information back out of the thread-local variable
+and add it to each log record.
+
+Logcontexts are also used for CPU and database accounting, so that we
+can track which requests were responsible for high CPU use or database
+activity.
+
+The `synapse.logging.context` module provides a facilities for managing
+the current log context (as well as providing the `LoggingContextFilter`
+class).
+
+Deferreds make the whole thing complicated, so this document describes
+how it all works, and how to write code which follows the rules.
+
+##Logcontexts without Deferreds
+
+In the absence of any Deferred voodoo, things are simple enough. As with
+any code of this nature, the rule is that our function should leave
+things as it found them:
+
+```python
+from synapse.logging import context         # omitted from future snippets
+
+def handle_request(request_id):
+    request_context = context.LoggingContext()
+
+    calling_context = context.LoggingContext.current_context()
+    context.LoggingContext.set_current_context(request_context)
+    try:
+        request_context.request = request_id
+        do_request_handling()
+        logger.debug("finished")
+    finally:
+        context.LoggingContext.set_current_context(calling_context)
+
+def do_request_handling():
+    logger.debug("phew")  # this will be logged against request_id
+```
+
+LoggingContext implements the context management methods, so the above
+can be written much more succinctly as:
+
+```python
+def handle_request(request_id):
+    with context.LoggingContext() as request_context:
+        request_context.request = request_id
+        do_request_handling()
+        logger.debug("finished")
+
+def do_request_handling():
+    logger.debug("phew")
+```
+
+## Using logcontexts with Deferreds
+
+Deferreds --- and in particular, `defer.inlineCallbacks` --- break the
+linear flow of code so that there is no longer a single entry point
+where we should set the logcontext and a single exit point where we
+should remove it.
+
+Consider the example above, where `do_request_handling` needs to do some
+blocking operation, and returns a deferred:
+
+```python
+@defer.inlineCallbacks
+def handle_request(request_id):
+    with context.LoggingContext() as request_context:
+        request_context.request = request_id
+        yield do_request_handling()
+        logger.debug("finished")
+```
+
+In the above flow:
+
+-   The logcontext is set
+-   `do_request_handling` is called, and returns a deferred
+-   `handle_request` yields the deferred
+-   The `inlineCallbacks` wrapper of `handle_request` returns a deferred
+
+So we have stopped processing the request (and will probably go on to
+start processing the next), without clearing the logcontext.
+
+To circumvent this problem, synapse code assumes that, wherever you have
+a deferred, you will want to yield on it. To that end, whereever
+functions return a deferred, we adopt the following conventions:
+
+**Rules for functions returning deferreds:**
+
+> -   If the deferred is already complete, the function returns with the
+>     same logcontext it started with.
+> -   If the deferred is incomplete, the function clears the logcontext
+>     before returning; when the deferred completes, it restores the
+>     logcontext before running any callbacks.
+
+That sounds complicated, but actually it means a lot of code (including
+the example above) "just works". There are two cases:
+
+-   If `do_request_handling` returns a completed deferred, then the
+    logcontext will still be in place. In this case, execution will
+    continue immediately after the `yield`; the "finished" line will
+    be logged against the right context, and the `with` block restores
+    the original context before we return to the caller.
+-   If the returned deferred is incomplete, `do_request_handling` clears
+    the logcontext before returning. The logcontext is therefore clear
+    when `handle_request` yields the deferred. At that point, the
+    `inlineCallbacks` wrapper adds a callback to the deferred, and
+    returns another (incomplete) deferred to the caller, and it is safe
+    to begin processing the next request.
+
+    Once `do_request_handling`'s deferred completes, it will reinstate
+    the logcontext, before running the callback added by the
+    `inlineCallbacks` wrapper. That callback runs the second half of
+    `handle_request`, so again the "finished" line will be logged
+    against the right context, and the `with` block restores the
+    original context.
+
+As an aside, it's worth noting that `handle_request` follows our rules
+-though that only matters if the caller has its own logcontext which it
+cares about.
+
+The following sections describe pitfalls and helpful patterns when
+implementing these rules.
+
+Always yield your deferreds
+---------------------------
+
+Whenever you get a deferred back from a function, you should `yield` on
+it as soon as possible. (Returning it directly to your caller is ok too,
+if you're not doing `inlineCallbacks`.) Do not pass go; do not do any
+logging; do not call any other functions.
+
+```python
+@defer.inlineCallbacks
+def fun():
+    logger.debug("starting")
+    yield do_some_stuff()       # just like this
+
+    d = more_stuff()
+    result = yield d            # also fine, of course
+
+    return result
+
+def nonInlineCallbacksFun():
+    logger.debug("just a wrapper really")
+    return do_some_stuff()      # this is ok too - the caller will yield on
+                                # it anyway.
+```
+
+Provided this pattern is followed all the way back up to the callchain
+to where the logcontext was set, this will make things work out ok:
+provided `do_some_stuff` and `more_stuff` follow the rules above, then
+so will `fun` (as wrapped by `inlineCallbacks`) and
+`nonInlineCallbacksFun`.
+
+It's all too easy to forget to `yield`: for instance if we forgot that
+`do_some_stuff` returned a deferred, we might plough on regardless. This
+leads to a mess; it will probably work itself out eventually, but not
+before a load of stuff has been logged against the wrong context.
+(Normally, other things will break, more obviously, if you forget to
+`yield`, so this tends not to be a major problem in practice.)
+
+Of course sometimes you need to do something a bit fancier with your
+Deferreds - not all code follows the linear A-then-B-then-C pattern.
+Notes on implementing more complex patterns are in later sections.
+
+## Where you create a new Deferred, make it follow the rules
+
+Most of the time, a Deferred comes from another synapse function.
+Sometimes, though, we need to make up a new Deferred, or we get a
+Deferred back from external code. We need to make it follow our rules.
+
+The easy way to do it is with a combination of `defer.inlineCallbacks`,
+and `context.PreserveLoggingContext`. Suppose we want to implement
+`sleep`, which returns a deferred which will run its callbacks after a
+given number of seconds. That might look like:
+
+```python
+# not a logcontext-rules-compliant function
+def get_sleep_deferred(seconds):
+    d = defer.Deferred()
+    reactor.callLater(seconds, d.callback, None)
+    return d
+```
+
+That doesn't follow the rules, but we can fix it by wrapping it with
+`PreserveLoggingContext` and `yield` ing on it:
+
+```python
+@defer.inlineCallbacks
+def sleep(seconds):
+    with PreserveLoggingContext():
+        yield get_sleep_deferred(seconds)
+```
+
+This technique works equally for external functions which return
+deferreds, or deferreds we have made ourselves.
+
+You can also use `context.make_deferred_yieldable`, which just does the
+boilerplate for you, so the above could be written:
+
+```python
+def sleep(seconds):
+    return context.make_deferred_yieldable(get_sleep_deferred(seconds))
+```
+
+## Fire-and-forget
+
+Sometimes you want to fire off a chain of execution, but not wait for
+its result. That might look a bit like this:
+
+```python
+@defer.inlineCallbacks
+def do_request_handling():
+    yield foreground_operation()
+
+    # *don't* do this
+    background_operation()
+
+    logger.debug("Request handling complete")
+
+@defer.inlineCallbacks
+def background_operation():
+    yield first_background_step()
+    logger.debug("Completed first step")
+    yield second_background_step()
+    logger.debug("Completed second step")
+```
+
+The above code does a couple of steps in the background after
+`do_request_handling` has finished. The log lines are still logged
+against the `request_context` logcontext, which may or may not be
+desirable. There are two big problems with the above, however. The first
+problem is that, if `background_operation` returns an incomplete
+Deferred, it will expect its caller to `yield` immediately, so will have
+cleared the logcontext. In this example, that means that 'Request
+handling complete' will be logged without any context.
+
+The second problem, which is potentially even worse, is that when the
+Deferred returned by `background_operation` completes, it will restore
+the original logcontext. There is nothing waiting on that Deferred, so
+the logcontext will leak into the reactor and possibly get attached to
+some arbitrary future operation.
+
+There are two potential solutions to this.
+
+One option is to surround the call to `background_operation` with a
+`PreserveLoggingContext` call. That will reset the logcontext before
+starting `background_operation` (so the context restored when the
+deferred completes will be the empty logcontext), and will restore the
+current logcontext before continuing the foreground process:
+
+```python
+@defer.inlineCallbacks
+def do_request_handling():
+    yield foreground_operation()
+
+    # start background_operation off in the empty logcontext, to
+    # avoid leaking the current context into the reactor.
+    with PreserveLoggingContext():
+        background_operation()
+
+    # this will now be logged against the request context
+    logger.debug("Request handling complete")
+```
+
+Obviously that option means that the operations done in
+`background_operation` would be not be logged against a logcontext
+(though that might be fixed by setting a different logcontext via a
+`with LoggingContext(...)` in `background_operation`).
+
+The second option is to use `context.run_in_background`, which wraps a
+function so that it doesn't reset the logcontext even when it returns
+an incomplete deferred, and adds a callback to the returned deferred to
+reset the logcontext. In other words, it turns a function that follows
+the Synapse rules about logcontexts and Deferreds into one which behaves
+more like an external function --- the opposite operation to that
+described in the previous section. It can be used like this:
+
+```python
+@defer.inlineCallbacks
+def do_request_handling():
+    yield foreground_operation()
+
+    context.run_in_background(background_operation)
+
+    # this will now be logged against the request context
+    logger.debug("Request handling complete")
+```
+
+## Passing synapse deferreds into third-party functions
+
+A typical example of this is where we want to collect together two or
+more deferred via `defer.gatherResults`:
+
+```python
+d1 = operation1()
+d2 = operation2()
+d3 = defer.gatherResults([d1, d2])
+```
+
+This is really a variation of the fire-and-forget problem above, in that
+we are firing off `d1` and `d2` without yielding on them. The difference
+is that we now have third-party code attached to their callbacks. Anyway
+either technique given in the [Fire-and-forget](#fire-and-forget)
+section will work.
+
+Of course, the new Deferred returned by `gatherResults` needs to be
+wrapped in order to make it follow the logcontext rules before we can
+yield it, as described in [Where you create a new Deferred, make it
+follow the
+rules](#where-you-create-a-new-deferred-make-it-follow-the-rules).
+
+So, option one: reset the logcontext before starting the operations to
+be gathered:
+
+```python
+@defer.inlineCallbacks
+def do_request_handling():
+    with PreserveLoggingContext():
+        d1 = operation1()
+        d2 = operation2()
+        result = yield defer.gatherResults([d1, d2])
+```
+
+In this case particularly, though, option two, of using
+`context.preserve_fn` almost certainly makes more sense, so that
+`operation1` and `operation2` are both logged against the original
+logcontext. This looks like:
+
+```python
+@defer.inlineCallbacks
+def do_request_handling():
+    d1 = context.preserve_fn(operation1)()
+    d2 = context.preserve_fn(operation2)()
+
+    with PreserveLoggingContext():
+        result = yield defer.gatherResults([d1, d2])
+```
+
+## Was all this really necessary?
+
+The conventions used work fine for a linear flow where everything
+happens in series via `defer.inlineCallbacks` and `yield`, but are
+certainly tricky to follow for any more exotic flows. It's hard not to
+wonder if we could have done something else.
+
+We're not going to rewrite Synapse now, so the following is entirely of
+academic interest, but I'd like to record some thoughts on an
+alternative approach.
+
+I briefly prototyped some code following an alternative set of rules. I
+think it would work, but I certainly didn't get as far as thinking how
+it would interact with concepts as complicated as the cache descriptors.
+
+My alternative rules were:
+
+-   functions always preserve the logcontext of their caller, whether or
+    not they are returning a Deferred.
+-   Deferreds returned by synapse functions run their callbacks in the
+    same context as the function was orignally called in.
+
+The main point of this scheme is that everywhere that sets the
+logcontext is responsible for clearing it before returning control to
+the reactor.
+
+So, for example, if you were the function which started a
+`with LoggingContext` block, you wouldn't `yield` within it --- instead
+you'd start off the background process, and then leave the `with` block
+to wait for it:
+
+```python
+def handle_request(request_id):
+    with context.LoggingContext() as request_context:
+        request_context.request = request_id
+        d = do_request_handling()
+
+    def cb(r):
+        logger.debug("finished")
+
+    d.addCallback(cb)
+    return d
+```
+
+(in general, mixing `with LoggingContext` blocks and
+`defer.inlineCallbacks` in the same function leads to slighly
+counter-intuitive code, under this scheme).
+
+Because we leave the original `with` block as soon as the Deferred is
+returned (as opposed to waiting for it to be resolved, as we do today),
+the logcontext is cleared before control passes back to the reactor; so
+if there is some code within `do_request_handling` which needs to wait
+for a Deferred to complete, there is no need for it to worry about
+clearing the logcontext before doing so:
+
+```python
+def handle_request():
+    r = do_some_stuff()
+    r.addCallback(do_some_more_stuff)
+    return r
+```
+
+--- and provided `do_some_stuff` follows the rules of returning a
+Deferred which runs its callbacks in the original logcontext, all is
+happy.
+
+The business of a Deferred which runs its callbacks in the original
+logcontext isn't hard to achieve --- we have it today, in the shape of
+`context._PreservingContextDeferred`:
+
+```python
+def do_some_stuff():
+    deferred = do_some_io()
+    pcd = _PreservingContextDeferred(LoggingContext.current_context())
+    deferred.chainDeferred(pcd)
+    return pcd
+```
+
+It turns out that, thanks to the way that Deferreds chain together, we
+automatically get the property of a context-preserving deferred with
+`defer.inlineCallbacks`, provided the final Defered the function
+`yields` on has that property. So we can just write:
+
+```python
+@defer.inlineCallbacks
+def handle_request():
+    yield do_some_stuff()
+    yield do_some_more_stuff()
+```
+
+To conclude: I think this scheme would have worked equally well, with
+less danger of messing it up, and probably made some more esoteric code
+easier to write. But again --- changing the conventions of the entire
+Synapse codebase is not a sensible option for the marginal improvement
+offered.
+
+## A note on garbage-collection of Deferred chains
+
+It turns out that our logcontext rules do not play nicely with Deferred
+chains which get orphaned and garbage-collected.
+
+Imagine we have some code that looks like this:
+
+```python
+listener_queue = []
+
+def on_something_interesting():
+    for d in listener_queue:
+        d.callback("foo")
+
+@defer.inlineCallbacks
+def await_something_interesting():
+    new_deferred = defer.Deferred()
+    listener_queue.append(new_deferred)
+
+    with PreserveLoggingContext():
+        yield new_deferred
+```
+
+Obviously, the idea here is that we have a bunch of things which are
+waiting for an event. (It's just an example of the problem here, but a
+relatively common one.)
+
+Now let's imagine two further things happen. First of all, whatever was
+waiting for the interesting thing goes away. (Perhaps the request times
+out, or something *even more* interesting happens.)
+
+Secondly, let's suppose that we decide that the interesting thing is
+never going to happen, and we reset the listener queue:
+
+```python
+def reset_listener_queue():
+    listener_queue.clear()
+```
+
+So, both ends of the deferred chain have now dropped their references,
+and the deferred chain is now orphaned, and will be garbage-collected at
+some point. Note that `await_something_interesting` is a generator
+function, and when Python garbage-collects generator functions, it gives
+them a chance to clean up by making the `yield` raise a `GeneratorExit`
+exception. In our case, that means that the `__exit__` handler of
+`PreserveLoggingContext` will carefully restore the request context, but
+there is now nothing waiting for its return, so the request context is
+never cleared.
+
+To reiterate, this problem only arises when *both* ends of a deferred
+chain are dropped. Dropping the the reference to a deferred you're
+supposed to be calling is probably bad practice, so this doesn't
+actually happen too much. Unfortunately, when it does happen, it will
+lead to leaked logcontexts which are incredibly hard to track down.
diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst
deleted file mode 100644
index 4502cd945..000000000
--- a/docs/log_contexts.rst
+++ /dev/null
@@ -1,498 +0,0 @@
-Log Contexts
-============
-
-.. contents::
-
-To help track the processing of individual requests, synapse uses a
-'log context' to track which request it is handling at any given moment. This
-is done via a thread-local variable; a ``logging.Filter`` is then used to fish
-the information back out of the thread-local variable and add it to each log
-record.
-
-Logcontexts are also used for CPU and database accounting, so that we can track
-which requests were responsible for high CPU use or database activity.
-
-The ``synapse.logging.context`` module provides a facilities for managing the
-current log context (as well as providing the ``LoggingContextFilter`` class).
-
-Deferreds make the whole thing complicated, so this document describes how it
-all works, and how to write code which follows the rules.
-
-Logcontexts without Deferreds
------------------------------
-
-In the absence of any Deferred voodoo, things are simple enough. As with any
-code of this nature, the rule is that our function should leave things as it
-found them:
-
-.. code:: python
-
-    from synapse.logging import context         # omitted from future snippets
-
-    def handle_request(request_id):
-        request_context = context.LoggingContext()
-
-        calling_context = context.LoggingContext.current_context()
-        context.LoggingContext.set_current_context(request_context)
-        try:
-            request_context.request = request_id
-            do_request_handling()
-            logger.debug("finished")
-        finally:
-            context.LoggingContext.set_current_context(calling_context)
-
-    def do_request_handling():
-        logger.debug("phew")  # this will be logged against request_id
-
-
-LoggingContext implements the context management methods, so the above can be
-written much more succinctly as:
-
-.. code:: python
-
-    def handle_request(request_id):
-        with context.LoggingContext() as request_context:
-            request_context.request = request_id
-            do_request_handling()
-            logger.debug("finished")
-
-    def do_request_handling():
-        logger.debug("phew")
-
-
-Using logcontexts with Deferreds
---------------------------------
-
-Deferreds — and in particular, ``defer.inlineCallbacks`` — break
-the linear flow of code so that there is no longer a single entry point where
-we should set the logcontext and a single exit point where we should remove it.
-
-Consider the example above, where ``do_request_handling`` needs to do some
-blocking operation, and returns a deferred:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def handle_request(request_id):
-        with context.LoggingContext() as request_context:
-            request_context.request = request_id
-            yield do_request_handling()
-            logger.debug("finished")
-
-
-In the above flow:
-
-* The logcontext is set
-* ``do_request_handling`` is called, and returns a deferred
-* ``handle_request`` yields the deferred
-* The ``inlineCallbacks`` wrapper of ``handle_request`` returns a deferred
-
-So we have stopped processing the request (and will probably go on to start
-processing the next), without clearing the logcontext.
-
-To circumvent this problem, synapse code assumes that, wherever you have a
-deferred, you will want to yield on it. To that end, whereever functions return
-a deferred, we adopt the following conventions:
-
-**Rules for functions returning deferreds:**
-
-  * If the deferred is already complete, the function returns with the same
-    logcontext it started with.
-  * If the deferred is incomplete, the function clears the logcontext before
-    returning; when the deferred completes, it restores the logcontext before
-    running any callbacks.
-
-That sounds complicated, but actually it means a lot of code (including the
-example above) "just works". There are two cases:
-
-* If ``do_request_handling`` returns a completed deferred, then the logcontext
-  will still be in place. In this case, execution will continue immediately
-  after the ``yield``; the "finished" line will be logged against the right
-  context, and the ``with`` block restores the original context before we
-  return to the caller.
-
-* If the returned deferred is incomplete, ``do_request_handling`` clears the
-  logcontext before returning. The logcontext is therefore clear when
-  ``handle_request`` yields the deferred. At that point, the ``inlineCallbacks``
-  wrapper adds a callback to the deferred, and returns another (incomplete)
-  deferred to the caller, and it is safe to begin processing the next request.
-
-  Once ``do_request_handling``'s deferred completes, it will reinstate the
-  logcontext, before running the callback added by the ``inlineCallbacks``
-  wrapper. That callback runs the second half of ``handle_request``, so again
-  the "finished" line will be logged against the right
-  context, and the ``with`` block restores the original context.
-
-As an aside, it's worth noting that ``handle_request`` follows our rules -
-though that only matters if the caller has its own logcontext which it cares
-about.
-
-The following sections describe pitfalls and helpful patterns when implementing
-these rules.
-
-Always yield your deferreds
----------------------------
-
-Whenever you get a deferred back from a function, you should ``yield`` on it
-as soon as possible. (Returning it directly to your caller is ok too, if you're
-not doing ``inlineCallbacks``.) Do not pass go; do not do any logging; do not
-call any other functions.
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def fun():
-        logger.debug("starting")
-        yield do_some_stuff()       # just like this
-
-        d = more_stuff()
-        result = yield d            # also fine, of course
-
-        return result
-
-    def nonInlineCallbacksFun():
-        logger.debug("just a wrapper really")
-        return do_some_stuff()      # this is ok too - the caller will yield on
-                                    # it anyway.
-
-Provided this pattern is followed all the way back up to the callchain to where
-the logcontext was set, this will make things work out ok: provided
-``do_some_stuff`` and ``more_stuff`` follow the rules above, then so will
-``fun`` (as wrapped by ``inlineCallbacks``) and ``nonInlineCallbacksFun``.
-
-It's all too easy to forget to ``yield``: for instance if we forgot that
-``do_some_stuff`` returned a deferred, we might plough on regardless. This
-leads to a mess; it will probably work itself out eventually, but not before
-a load of stuff has been logged against the wrong context. (Normally, other
-things will break, more obviously, if you forget to ``yield``, so this tends
-not to be a major problem in practice.)
-
-Of course sometimes you need to do something a bit fancier with your Deferreds
-- not all code follows the linear A-then-B-then-C pattern. Notes on
-implementing more complex patterns are in later sections.
-
-Where you create a new Deferred, make it follow the rules
----------------------------------------------------------
-
-Most of the time, a Deferred comes from another synapse function. Sometimes,
-though, we need to make up a new Deferred, or we get a Deferred back from
-external code. We need to make it follow our rules.
-
-The easy way to do it is with a combination of ``defer.inlineCallbacks``, and
-``context.PreserveLoggingContext``. Suppose we want to implement ``sleep``,
-which returns a deferred which will run its callbacks after a given number of
-seconds. That might look like:
-
-.. code:: python
-
-    # not a logcontext-rules-compliant function
-    def get_sleep_deferred(seconds):
-        d = defer.Deferred()
-        reactor.callLater(seconds, d.callback, None)
-        return d
-
-That doesn't follow the rules, but we can fix it by wrapping it with
-``PreserveLoggingContext`` and ``yield`` ing on it:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def sleep(seconds):
-        with PreserveLoggingContext():
-            yield get_sleep_deferred(seconds)
-
-This technique works equally for external functions which return deferreds,
-or deferreds we have made ourselves.
-
-You can also use ``context.make_deferred_yieldable``, which just does the
-boilerplate for you, so the above could be written:
-
-.. code:: python
-
-    def sleep(seconds):
-        return context.make_deferred_yieldable(get_sleep_deferred(seconds))
-
-
-Fire-and-forget
----------------
-
-Sometimes you want to fire off a chain of execution, but not wait for its
-result. That might look a bit like this:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def do_request_handling():
-        yield foreground_operation()
-
-        # *don't* do this
-        background_operation()
-
-        logger.debug("Request handling complete")
-
-    @defer.inlineCallbacks
-    def background_operation():
-        yield first_background_step()
-        logger.debug("Completed first step")
-        yield second_background_step()
-        logger.debug("Completed second step")
-
-The above code does a couple of steps in the background after
-``do_request_handling`` has finished. The log lines are still logged against
-the ``request_context`` logcontext, which may or may not be desirable. There
-are two big problems with the above, however. The first problem is that, if
-``background_operation`` returns an incomplete Deferred, it will expect its
-caller to ``yield`` immediately, so will have cleared the logcontext. In this
-example, that means that 'Request handling complete' will be logged without any
-context.
-
-The second problem, which is potentially even worse, is that when the Deferred
-returned by ``background_operation`` completes, it will restore the original
-logcontext. There is nothing waiting on that Deferred, so the logcontext will
-leak into the reactor and possibly get attached to some arbitrary future
-operation.
-
-There are two potential solutions to this.
-
-One option is to surround the call to ``background_operation`` with a
-``PreserveLoggingContext`` call. That will reset the logcontext before
-starting ``background_operation`` (so the context restored when the deferred
-completes will be the empty logcontext), and will restore the current
-logcontext before continuing the foreground process:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def do_request_handling():
-        yield foreground_operation()
-
-        # start background_operation off in the empty logcontext, to
-        # avoid leaking the current context into the reactor.
-        with PreserveLoggingContext():
-            background_operation()
-
-        # this will now be logged against the request context
-        logger.debug("Request handling complete")
-
-Obviously that option means that the operations done in
-``background_operation`` would be not be logged against a logcontext (though
-that might be fixed by setting a different logcontext via a ``with
-LoggingContext(...)`` in ``background_operation``).
-
-The second option is to use ``context.run_in_background``, which wraps a
-function so that it doesn't reset the logcontext even when it returns an
-incomplete deferred, and adds a callback to the returned deferred to reset the
-logcontext. In other words, it turns a function that follows the Synapse rules
-about logcontexts and Deferreds into one which behaves more like an external
-function — the opposite operation to that described in the previous section.
-It can be used like this:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def do_request_handling():
-        yield foreground_operation()
-
-        context.run_in_background(background_operation)
-
-        # this will now be logged against the request context
-        logger.debug("Request handling complete")
-
-Passing synapse deferreds into third-party functions
-----------------------------------------------------
-
-A typical example of this is where we want to collect together two or more
-deferred via ``defer.gatherResults``:
-
-.. code:: python
-
-    d1 = operation1()
-    d2 = operation2()
-    d3 = defer.gatherResults([d1, d2])
-
-This is really a variation of the fire-and-forget problem above, in that we are
-firing off ``d1`` and ``d2`` without yielding on them. The difference
-is that we now have third-party code attached to their callbacks. Anyway either
-technique given in the `Fire-and-forget`_ section will work.
-
-Of course, the new Deferred returned by ``gatherResults`` needs to be wrapped
-in order to make it follow the logcontext rules before we can yield it, as
-described in `Where you create a new Deferred, make it follow the rules`_.
-
-So, option one: reset the logcontext before starting the operations to be
-gathered:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def do_request_handling():
-        with PreserveLoggingContext():
-            d1 = operation1()
-            d2 = operation2()
-            result = yield defer.gatherResults([d1, d2])
-
-In this case particularly, though, option two, of using
-``context.preserve_fn`` almost certainly makes more sense, so that
-``operation1`` and ``operation2`` are both logged against the original
-logcontext. This looks like:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def do_request_handling():
-        d1 = context.preserve_fn(operation1)()
-        d2 = context.preserve_fn(operation2)()
-
-        with PreserveLoggingContext():
-            result = yield defer.gatherResults([d1, d2])
-
-
-Was all this really necessary?
-------------------------------
-
-The conventions used work fine for a linear flow where everything happens in
-series via ``defer.inlineCallbacks`` and ``yield``, but are certainly tricky to
-follow for any more exotic flows. It's hard not to wonder if we could have done
-something else.
-
-We're not going to rewrite Synapse now, so the following is entirely of
-academic interest, but I'd like to record some thoughts on an alternative
-approach.
-
-I briefly prototyped some code following an alternative set of rules. I think
-it would work, but I certainly didn't get as far as thinking how it would
-interact with concepts as complicated as the cache descriptors.
-
-My alternative rules were:
-
-* functions always preserve the logcontext of their caller, whether or not they
-  are returning a Deferred.
-
-* Deferreds returned by synapse functions run their callbacks in the same
-  context as the function was orignally called in.
-
-The main point of this scheme is that everywhere that sets the logcontext is
-responsible for clearing it before returning control to the reactor.
-
-So, for example, if you were the function which started a ``with
-LoggingContext`` block, you wouldn't ``yield`` within it — instead you'd start
-off the background process, and then leave the ``with`` block to wait for it:
-
-.. code:: python
-
-    def handle_request(request_id):
-        with context.LoggingContext() as request_context:
-            request_context.request = request_id
-            d = do_request_handling()
-
-        def cb(r):
-            logger.debug("finished")
-
-        d.addCallback(cb)
-        return d
-
-(in general, mixing ``with LoggingContext`` blocks and
-``defer.inlineCallbacks`` in the same function leads to slighly
-counter-intuitive code, under this scheme).
-
-Because we leave the original ``with`` block as soon as the Deferred is
-returned (as opposed to waiting for it to be resolved, as we do today), the
-logcontext is cleared before control passes back to the reactor; so if there is
-some code within ``do_request_handling`` which needs to wait for a Deferred to
-complete, there is no need for it to worry about clearing the logcontext before
-doing so:
-
-.. code:: python
-
-    def handle_request():
-        r = do_some_stuff()
-        r.addCallback(do_some_more_stuff)
-        return r
-
-— and provided ``do_some_stuff`` follows the rules of returning a Deferred which
-runs its callbacks in the original logcontext, all is happy.
-
-The business of a Deferred which runs its callbacks in the original logcontext
-isn't hard to achieve — we have it today, in the shape of
-``context._PreservingContextDeferred``:
-
-.. code:: python
-
-    def do_some_stuff():
-        deferred = do_some_io()
-        pcd = _PreservingContextDeferred(LoggingContext.current_context())
-        deferred.chainDeferred(pcd)
-        return pcd
-
-It turns out that, thanks to the way that Deferreds chain together, we
-automatically get the property of a context-preserving deferred with
-``defer.inlineCallbacks``, provided the final Defered the function ``yields``
-on has that property. So we can just write:
-
-.. code:: python
-
-    @defer.inlineCallbacks
-    def handle_request():
-        yield do_some_stuff()
-        yield do_some_more_stuff()
-
-To conclude: I think this scheme would have worked equally well, with less
-danger of messing it up, and probably made some more esoteric code easier to
-write. But again — changing the conventions of the entire Synapse codebase is
-not a sensible option for the marginal improvement offered.
-
-
-A note on garbage-collection of Deferred chains
------------------------------------------------
-
-It turns out that our logcontext rules do not play nicely with Deferred
-chains which get orphaned and garbage-collected.
-
-Imagine we have some code that looks like this:
-
-.. code:: python
-
-    listener_queue = []
-
-    def on_something_interesting():
-        for d in listener_queue:
-            d.callback("foo")
-
-    @defer.inlineCallbacks
-    def await_something_interesting():
-        new_deferred = defer.Deferred()
-        listener_queue.append(new_deferred)
-
-        with PreserveLoggingContext():
-            yield new_deferred
-
-Obviously, the idea here is that we have a bunch of things which are waiting
-for an event. (It's just an example of the problem here, but a relatively
-common one.)
-
-Now let's imagine two further things happen. First of all, whatever was
-waiting for the interesting thing goes away. (Perhaps the request times out,
-or something *even more* interesting happens.)
-
-Secondly, let's suppose that we decide that the interesting thing is never
-going to happen, and we reset the listener queue:
-
-.. code:: python
-
-    def reset_listener_queue():
-        listener_queue.clear()
-
-So, both ends of the deferred chain have now dropped their references, and the
-deferred chain is now orphaned, and will be garbage-collected at some point.
-Note that ``await_something_interesting`` is a generator function, and when
-Python garbage-collects generator functions, it gives them a chance to clean
-up by making the ``yield`` raise a ``GeneratorExit`` exception. In our case,
-that means that the ``__exit__`` handler of ``PreserveLoggingContext`` will
-carefully restore the request context, but there is now nothing waiting for
-its return, so the request context is never cleared.
-
-To reiterate, this problem only arises when *both* ends of a deferred chain
-are dropped. Dropping the the reference to a deferred you're supposed to be
-calling is probably bad practice, so this doesn't actually happen too much.
-Unfortunately, when it does happen, it will lead to leaked logcontexts which
-are incredibly hard to track down.
diff --git a/docs/media_repository.md b/docs/media_repository.md
new file mode 100644
index 000000000..1bf8f16f5
--- /dev/null
+++ b/docs/media_repository.md
@@ -0,0 +1,30 @@
+# Media Repository 
+
+*Synapse implementation-specific details for the media repository*
+
+The media repository is where attachments and avatar photos are stored.
+It stores attachment content and thumbnails for media uploaded by local users.
+It caches attachment content and thumbnails for media uploaded by remote users.
+
+## Storage
+
+Each item of media is assigned a `media_id` when it is uploaded.
+The `media_id` is a randomly chosen, URL safe 24 character string.
+
+Metadata such as the MIME type, upload time and length are stored in the
+sqlite3 database indexed by `media_id`.
+
+Content is stored on the filesystem under a `"local_content"` directory.
+
+Thumbnails are stored under a `"local_thumbnails"` directory.
+
+The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under
+`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width
+`128` and height `96` and type `"image/jpeg"` is stored under
+`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"`
+
+Remote content is cached under `"remote_content"` directory. Each item of
+remote content is assigned a local `"filesystem_id"` to ensure that the
+directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"`
+is appropriate. Thumbnails for remote content are stored under
+`"remote_thumbnails/server_name/..."`
diff --git a/docs/media_repository.rst b/docs/media_repository.rst
deleted file mode 100644
index 1037b5be6..000000000
--- a/docs/media_repository.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-Media Repository 
-================
-
-*Synapse implementation-specific details for the media repository*
-
-The media repository is where attachments and avatar photos are stored.
-It stores attachment content and thumbnails for media uploaded by local users.
-It caches attachment content and thumbnails for media uploaded by remote users.
-
-Storage
--------
-
-Each item of media is assigned a ``media_id`` when it is uploaded.
-The ``media_id`` is a randomly chosen, URL safe 24 character string.
-Metadata such as the MIME type, upload time and length are stored in the
-sqlite3 database indexed by ``media_id``.
-Content is stored on the filesystem under a ``"local_content"`` directory.
-Thumbnails are stored under a ``"local_thumbnails"`` directory.
-The item with ``media_id`` ``"aabbccccccccdddddddddddd"`` is stored under
-``"local_content/aa/bb/ccccccccdddddddddddd"``. Its thumbnail with width
-``128`` and height ``96`` and type ``"image/jpeg"`` is stored under
-``"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"``
-Remote content is cached under ``"remote_content"`` directory. Each item of
-remote content is assigned a local "``filesystem_id``" to ensure that the
-directory structure ``"remote_content/server_name/aa/bb/ccccccccdddddddddddd"``
-is appropriate. Thumbnails for remote content are stored under
-``"remote_thumbnails/server_name/..."``
diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md
new file mode 100644
index 000000000..32abb9f44
--- /dev/null
+++ b/docs/metrics-howto.md
@@ -0,0 +1,217 @@
+# How to monitor Synapse metrics using Prometheus
+
+1.  Install Prometheus:
+
+    Follow instructions at
+    <http://prometheus.io/docs/introduction/install/>
+
+1.  Enable Synapse metrics:
+
+    There are two methods of enabling metrics in Synapse.
+
+    The first serves the metrics as a part of the usual web server and
+    can be enabled by adding the \"metrics\" resource to the existing
+    listener as such:
+
+        resources:
+          - names:
+            - client
+            - metrics
+
+    This provides a simple way of adding metrics to your Synapse
+    installation, and serves under `/_synapse/metrics`. If you do not
+    wish your metrics be publicly exposed, you will need to either
+    filter it out at your load balancer, or use the second method.
+
+    The second method runs the metrics server on a different port, in a
+    different thread to Synapse. This can make it more resilient to
+    heavy load meaning metrics cannot be retrieved, and can be exposed
+    to just internal networks easier. The served metrics are available
+    over HTTP only, and will be available at `/`.
+
+    Add a new listener to homeserver.yaml:
+
+        listeners:
+          - type: metrics
+            port: 9000
+            bind_addresses:
+              - '0.0.0.0'
+
+    For both options, you will need to ensure that `enable_metrics` is
+    set to `True`.
+
+1.  Restart Synapse.
+
+1.  Add a Prometheus target for Synapse.
+
+    It needs to set the `metrics_path` to a non-default value (under
+    `scrape_configs`):
+
+        - job_name: "synapse"
+          metrics_path: "/_synapse/metrics"
+          static_configs:
+            - targets: ["my.server.here:port"]
+
+    where `my.server.here` is the IP address of Synapse, and `port` is
+    the listener port configured with the `metrics` resource.
+
+    If your prometheus is older than 1.5.2, you will need to replace
+    `static_configs` in the above with `target_groups`.
+
+1.  Restart Prometheus.
+
+## Renaming of metrics & deprecation of old names in 1.2
+
+Synapse 1.2 updates the Prometheus metrics to match the naming
+convention of the upstream `prometheus_client`. The old names are
+considered deprecated and will be removed in a future version of
+Synapse.
+
+| New Name                                                                     | Old Name                                                               |
+| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
+| python_gc_objects_collected_total                                            | python_gc_objects_collected                                            |
+| python_gc_objects_uncollectable_total                                        | python_gc_objects_uncollectable                                        |
+| python_gc_collections_total                                                  | python_gc_collections                                                  |
+| process_cpu_seconds_total                                                    | process_cpu_seconds                                                    |
+| synapse_federation_client_sent_transactions_total                            | synapse_federation_client_sent_transactions                            |
+| synapse_federation_client_events_processed_total                             | synapse_federation_client_events_processed                             |
+| synapse_event_processing_loop_count_total                                    | synapse_event_processing_loop_count                                    |
+| synapse_event_processing_loop_room_count_total                               | synapse_event_processing_loop_room_count                               |
+| synapse_util_metrics_block_count_total                                       | synapse_util_metrics_block_count                                       |
+| synapse_util_metrics_block_time_seconds_total                                | synapse_util_metrics_block_time_seconds                                |
+| synapse_util_metrics_block_ru_utime_seconds_total                            | synapse_util_metrics_block_ru_utime_seconds                            |
+| synapse_util_metrics_block_ru_stime_seconds_total                            | synapse_util_metrics_block_ru_stime_seconds                            |
+| synapse_util_metrics_block_db_txn_count_total                                | synapse_util_metrics_block_db_txn_count                                |
+| synapse_util_metrics_block_db_txn_duration_seconds_total                     | synapse_util_metrics_block_db_txn_duration_seconds                     |
+| synapse_util_metrics_block_db_sched_duration_seconds_total                   | synapse_util_metrics_block_db_sched_duration_seconds                   |
+| synapse_background_process_start_count_total                                 | synapse_background_process_start_count                                 |
+| synapse_background_process_ru_utime_seconds_total                            | synapse_background_process_ru_utime_seconds                            |
+| synapse_background_process_ru_stime_seconds_total                            | synapse_background_process_ru_stime_seconds                            |
+| synapse_background_process_db_txn_count_total                                | synapse_background_process_db_txn_count                                |
+| synapse_background_process_db_txn_duration_seconds_total                     | synapse_background_process_db_txn_duration_seconds                     |
+| synapse_background_process_db_sched_duration_seconds_total                   | synapse_background_process_db_sched_duration_seconds                   |
+| synapse_storage_events_persisted_events_total                                | synapse_storage_events_persisted_events                                |
+| synapse_storage_events_persisted_events_sep_total                            | synapse_storage_events_persisted_events_sep                            |
+| synapse_storage_events_state_delta_total                                     | synapse_storage_events_state_delta                                     |
+| synapse_storage_events_state_delta_single_event_total                        | synapse_storage_events_state_delta_single_event                        |
+| synapse_storage_events_state_delta_reuse_delta_total                         | synapse_storage_events_state_delta_reuse_delta                         |
+| synapse_federation_server_received_pdus_total                                | synapse_federation_server_received_pdus                                |
+| synapse_federation_server_received_edus_total                                | synapse_federation_server_received_edus                                |
+| synapse_handler_presence_notified_presence_total                             | synapse_handler_presence_notified_presence                             |
+| synapse_handler_presence_federation_presence_out_total                       | synapse_handler_presence_federation_presence_out                       |
+| synapse_handler_presence_presence_updates_total                              | synapse_handler_presence_presence_updates                              |
+| synapse_handler_presence_timers_fired_total                                  | synapse_handler_presence_timers_fired                                  |
+| synapse_handler_presence_federation_presence_total                           | synapse_handler_presence_federation_presence                           |
+| synapse_handler_presence_bump_active_time_total                              | synapse_handler_presence_bump_active_time                              |
+| synapse_federation_client_sent_edus_total                                    | synapse_federation_client_sent_edus                                    |
+| synapse_federation_client_sent_pdu_destinations_count_total                  | synapse_federation_client_sent_pdu_destinations:count                  |
+| synapse_federation_client_sent_pdu_destinations_total                        | synapse_federation_client_sent_pdu_destinations:total                  |
+| synapse_handlers_appservice_events_processed_total                           | synapse_handlers_appservice_events_processed                           |
+| synapse_notifier_notified_events_total                                       | synapse_notifier_notified_events                                       |
+| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total  | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter  |
+| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total    | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter    |
+| synapse_http_httppusher_http_pushes_processed_total                          | synapse_http_httppusher_http_pushes_processed                          |
+| synapse_http_httppusher_http_pushes_failed_total                             | synapse_http_httppusher_http_pushes_failed                             |
+| synapse_http_httppusher_badge_updates_processed_total                        | synapse_http_httppusher_badge_updates_processed                        |
+| synapse_http_httppusher_badge_updates_failed_total                           | synapse_http_httppusher_badge_updates_failed                           |
+
+Removal of deprecated metrics & time based counters becoming histograms in 0.31.0
+---------------------------------------------------------------------------------
+
+The duplicated metrics deprecated in Synapse 0.27.0 have been removed.
+
+All time duration-based metrics have been changed to be seconds. This
+affects:
+
+| msec -> sec metrics                    |
+| -------------------------------------- |
+| python_gc_time                         |
+| python_twisted_reactor_tick_time       |
+| synapse_storage_query_time             |
+| synapse_storage_schedule_time          |
+| synapse_storage_transaction_time       |
+
+Several metrics have been changed to be histograms, which sort entries
+into buckets and allow better analysis. The following metrics are now
+histograms:
+
+| Altered metrics                                  |
+| ------------------------------------------------ |
+| python_gc_time                                   |
+| python_twisted_reactor_pending_calls             |
+| python_twisted_reactor_tick_time                 |
+| synapse_http_server_response_time_seconds        |
+| synapse_storage_query_time                       |
+| synapse_storage_schedule_time                    |
+| synapse_storage_transaction_time                 |
+
+Block and response metrics renamed for 0.27.0
+---------------------------------------------
+
+Synapse 0.27.0 begins the process of rationalising the duplicate
+`*:count` metrics reported for the resource tracking for code blocks and
+HTTP requests.
+
+At the same time, the corresponding `*:total` metrics are being renamed,
+as the `:total` suffix no longer makes sense in the absence of a
+corresponding `:count` metric.
+
+To enable a graceful migration path, this release just adds new names
+for the metrics being renamed. A future release will remove the old
+ones.
+
+The following table shows the new metrics, and the old metrics which
+they are replacing.
+
+| New name                                                      | Old name                                                   |
+| ------------------------------------------------------------- | ---------------------------------------------------------- |
+| synapse_util_metrics_block_count                              | synapse_util_metrics_block_timer:count                     |
+| synapse_util_metrics_block_count                              | synapse_util_metrics_block_ru_utime:count                  |
+| synapse_util_metrics_block_count                              | synapse_util_metrics_block_ru_stime:count                  |
+| synapse_util_metrics_block_count                              | synapse_util_metrics_block_db_txn_count:count              |
+| synapse_util_metrics_block_count                              | synapse_util_metrics_block_db_txn_duration:count           |
+| synapse_util_metrics_block_time_seconds                       | synapse_util_metrics_block_timer:total                     |
+| synapse_util_metrics_block_ru_utime_seconds                   | synapse_util_metrics_block_ru_utime:total                  |
+| synapse_util_metrics_block_ru_stime_seconds                   | synapse_util_metrics_block_ru_stime:total                  |
+| synapse_util_metrics_block_db_txn_count                       | synapse_util_metrics_block_db_txn_count:total              |
+| synapse_util_metrics_block_db_txn_duration_seconds            | synapse_util_metrics_block_db_txn_duration:total           |
+| synapse_http_server_response_count                            | synapse_http_server_requests                               |
+| synapse_http_server_response_count                            | synapse_http_server_response_time:count                    |
+| synapse_http_server_response_count                            | synapse_http_server_response_ru_utime:count                |
+| synapse_http_server_response_count                            | synapse_http_server_response_ru_stime:count                |
+| synapse_http_server_response_count                            | synapse_http_server_response_db_txn_count:count            |
+| synapse_http_server_response_count                            | synapse_http_server_response_db_txn_duration:count         |
+| synapse_http_server_response_time_seconds                     | synapse_http_server_response_time:total                    |
+| synapse_http_server_response_ru_utime_seconds                 | synapse_http_server_response_ru_utime:total                |
+| synapse_http_server_response_ru_stime_seconds                 | synapse_http_server_response_ru_stime:total                |
+| synapse_http_server_response_db_txn_count                     | synapse_http_server_response_db_txn_count:total            |
+| synapse_http_server_response_db_txn_duration_seconds          | synapse_http_server_response_db_txn_duration:total         |
+
+Standard Metric Names
+---------------------
+
+As of synapse version 0.18.2, the format of the process-wide metrics has
+been changed to fit prometheus standard naming conventions. Additionally
+the units have been changed to seconds, from miliseconds.
+
+| New name                                 | Old name                          |
+| ---------------------------------------- | --------------------------------- |
+| process_cpu_user_seconds_total           | process_resource_utime / 1000     |
+| process_cpu_system_seconds_total         | process_resource_stime / 1000     |
+| process_open_fds (no \'type\' label)     | process_fds                       |
+
+The python-specific counts of garbage collector performance have been
+renamed.
+
+| New name                         | Old name                   |
+| -------------------------------- | -------------------------- |
+| python_gc_time                   | reactor_gc_time            |
+| python_gc_unreachable_total      | reactor_gc_unreachable     |
+| python_gc_counts                 | reactor_gc_counts          |
+
+The twisted-specific reactor metrics have been renamed.
+
+| New name                               | Old name                |
+| -------------------------------------- | ----------------------- |
+| python_twisted_reactor_pending_calls   | reactor_pending_calls   |
+| python_twisted_reactor_tick_time       | reactor_tick_time       |
diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst
deleted file mode 100644
index 973641f3d..000000000
--- a/docs/metrics-howto.rst
+++ /dev/null
@@ -1,285 +0,0 @@
-How to monitor Synapse metrics using Prometheus
-===============================================
-
-1. Install Prometheus:
-
-   Follow instructions at http://prometheus.io/docs/introduction/install/
-
-2. Enable Synapse metrics:
-
-   There are two methods of enabling metrics in Synapse.
-
-   The first serves the metrics as a part of the usual web server and can be
-   enabled by adding the "metrics" resource to the existing listener as such::
-
-     resources:
-       - names:
-         - client
-         - metrics
-
-   This provides a simple way of adding metrics to your Synapse installation,
-   and serves under ``/_synapse/metrics``. If you do not wish your metrics be
-   publicly exposed, you will need to either filter it out at your load
-   balancer, or use the second method.
-
-   The second method runs the metrics server on a different port, in a
-   different thread to Synapse. This can make it more resilient to heavy load
-   meaning metrics cannot be retrieved, and can be exposed to just internal
-   networks easier. The served metrics are available over HTTP only, and will
-   be available at ``/``.
-
-   Add a new listener to homeserver.yaml::
-
-     listeners:
-       - type: metrics
-         port: 9000
-         bind_addresses:
-           - '0.0.0.0'
-
-   For both options, you will need to ensure that ``enable_metrics`` is set to
-   ``True``.
-
-   Restart Synapse.
-
-3. Add a Prometheus target for Synapse.
-
-   It needs to set the ``metrics_path`` to a non-default value (under ``scrape_configs``)::
-
-    - job_name: "synapse"
-      metrics_path: "/_synapse/metrics"
-      static_configs:
-        - targets: ["my.server.here:port"]
-
-   where ``my.server.here`` is the IP address of Synapse, and ``port`` is the listener port
-   configured with the ``metrics`` resource.
-
-   If your prometheus is older than 1.5.2, you will need to replace
-   ``static_configs`` in the above with ``target_groups``.
-
-   Restart Prometheus.
-
-
-Renaming of metrics & deprecation of old names in 1.2
------------------------------------------------------
-
-Synapse 1.2 updates the Prometheus metrics to match the naming convention of the
-upstream ``prometheus_client``. The old names are considered deprecated and will
-be removed in a future version of Synapse.
-
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-|                                  New Name                                   |                               Old Name                                |
-+=============================================================================+=======================================================================+
-| python_gc_objects_collected_total                                           | python_gc_objects_collected                                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| python_gc_objects_uncollectable_total                                       | python_gc_objects_uncollectable                                       |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| python_gc_collections_total                                                 | python_gc_collections                                                 |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| process_cpu_seconds_total                                                   | process_cpu_seconds                                                   |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_client_sent_transactions_total                           | synapse_federation_client_sent_transactions                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_client_events_processed_total                            | synapse_federation_client_events_processed                            |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_event_processing_loop_count_total                                   | synapse_event_processing_loop_count                                   |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_event_processing_loop_room_count_total                              | synapse_event_processing_loop_room_count                              |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_count_total                                      | synapse_util_metrics_block_count                                      |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_time_seconds_total                               | synapse_util_metrics_block_time_seconds                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_ru_utime_seconds_total                           | synapse_util_metrics_block_ru_utime_seconds                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_ru_stime_seconds_total                           | synapse_util_metrics_block_ru_stime_seconds                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_db_txn_count_total                               | synapse_util_metrics_block_db_txn_count                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_db_txn_duration_seconds_total                    | synapse_util_metrics_block_db_txn_duration_seconds                    |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_util_metrics_block_db_sched_duration_seconds_total                  | synapse_util_metrics_block_db_sched_duration_seconds                  |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_start_count_total                                | synapse_background_process_start_count                                |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_ru_utime_seconds_total                           | synapse_background_process_ru_utime_seconds                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_ru_stime_seconds_total                           | synapse_background_process_ru_stime_seconds                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_db_txn_count_total                               | synapse_background_process_db_txn_count                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_db_txn_duration_seconds_total                    | synapse_background_process_db_txn_duration_seconds                    |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_background_process_db_sched_duration_seconds_total                  | synapse_background_process_db_sched_duration_seconds                  |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_storage_events_persisted_events_total                               | synapse_storage_events_persisted_events                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_storage_events_persisted_events_sep_total                           | synapse_storage_events_persisted_events_sep                           |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_storage_events_state_delta_total                                    | synapse_storage_events_state_delta                                    |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_storage_events_state_delta_single_event_total                       | synapse_storage_events_state_delta_single_event                       |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_storage_events_state_delta_reuse_delta_total                        | synapse_storage_events_state_delta_reuse_delta                        |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_server_received_pdus_total                               | synapse_federation_server_received_pdus                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_server_received_edus_total                               | synapse_federation_server_received_edus                               |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_notified_presence_total                            | synapse_handler_presence_notified_presence                            |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_federation_presence_out_total                      | synapse_handler_presence_federation_presence_out                      |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_presence_updates_total                             | synapse_handler_presence_presence_updates                             |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_timers_fired_total                                 | synapse_handler_presence_timers_fired                                 |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_federation_presence_total                          | synapse_handler_presence_federation_presence                          |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handler_presence_bump_active_time_total                             | synapse_handler_presence_bump_active_time                             |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_client_sent_edus_total                                   | synapse_federation_client_sent_edus                                   |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_client_sent_pdu_destinations_count_total                 | synapse_federation_client_sent_pdu_destinations:count                 |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_federation_client_sent_pdu_destinations_total                       | synapse_federation_client_sent_pdu_destinations:total                 |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_handlers_appservice_events_processed_total                          | synapse_handlers_appservice_events_processed                          |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_notifier_notified_events_total                                      | synapse_notifier_notified_events                                      |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total   | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter   |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_http_httppusher_http_pushes_processed_total                         | synapse_http_httppusher_http_pushes_processed                         |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_http_httppusher_http_pushes_failed_total                            | synapse_http_httppusher_http_pushes_failed                            |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_http_httppusher_badge_updates_processed_total                       | synapse_http_httppusher_badge_updates_processed                       |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-| synapse_http_httppusher_badge_updates_failed_total                          | synapse_http_httppusher_badge_updates_failed                          |
-+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
-
-
-Removal of deprecated metrics & time based counters becoming histograms in 0.31.0
----------------------------------------------------------------------------------
-
-The duplicated metrics deprecated in Synapse 0.27.0 have been removed.
-
-All time duration-based metrics have been changed to be seconds. This affects:
-
-+----------------------------------+
-| msec -> sec metrics              |
-+==================================+
-| python_gc_time                   |
-+----------------------------------+
-| python_twisted_reactor_tick_time |
-+----------------------------------+
-| synapse_storage_query_time       |
-+----------------------------------+
-| synapse_storage_schedule_time    |
-+----------------------------------+
-| synapse_storage_transaction_time |
-+----------------------------------+
-
-Several metrics have been changed to be histograms, which sort entries into
-buckets and allow better analysis. The following metrics are now histograms:
-
-+-------------------------------------------+
-| Altered metrics                           |
-+===========================================+
-| python_gc_time                            |
-+-------------------------------------------+
-| python_twisted_reactor_pending_calls      |
-+-------------------------------------------+
-| python_twisted_reactor_tick_time          |
-+-------------------------------------------+
-| synapse_http_server_response_time_seconds |
-+-------------------------------------------+
-| synapse_storage_query_time                |
-+-------------------------------------------+
-| synapse_storage_schedule_time             |
-+-------------------------------------------+
-| synapse_storage_transaction_time          |
-+-------------------------------------------+
-
-
-Block and response metrics renamed for 0.27.0
----------------------------------------------
-
-Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count``
-metrics reported for the resource tracking for code blocks and HTTP requests.
-
-At the same time, the corresponding ``*:total`` metrics are being renamed, as
-the ``:total`` suffix no longer makes sense in the absence of a corresponding
-``:count`` metric.
-
-To enable a graceful migration path, this release just adds new names for the
-metrics being renamed. A future release will remove the old ones.
-
-The following table shows the new metrics, and the old metrics which they are
-replacing.
-
-==================================================== ===================================================
-New name                                             Old name
-==================================================== ===================================================
-synapse_util_metrics_block_count                     synapse_util_metrics_block_timer:count
-synapse_util_metrics_block_count                     synapse_util_metrics_block_ru_utime:count
-synapse_util_metrics_block_count                     synapse_util_metrics_block_ru_stime:count
-synapse_util_metrics_block_count                     synapse_util_metrics_block_db_txn_count:count
-synapse_util_metrics_block_count                     synapse_util_metrics_block_db_txn_duration:count
-
-synapse_util_metrics_block_time_seconds              synapse_util_metrics_block_timer:total
-synapse_util_metrics_block_ru_utime_seconds          synapse_util_metrics_block_ru_utime:total
-synapse_util_metrics_block_ru_stime_seconds          synapse_util_metrics_block_ru_stime:total
-synapse_util_metrics_block_db_txn_count              synapse_util_metrics_block_db_txn_count:total
-synapse_util_metrics_block_db_txn_duration_seconds   synapse_util_metrics_block_db_txn_duration:total
-
-synapse_http_server_response_count                   synapse_http_server_requests
-synapse_http_server_response_count                   synapse_http_server_response_time:count
-synapse_http_server_response_count                   synapse_http_server_response_ru_utime:count
-synapse_http_server_response_count                   synapse_http_server_response_ru_stime:count
-synapse_http_server_response_count                   synapse_http_server_response_db_txn_count:count
-synapse_http_server_response_count                   synapse_http_server_response_db_txn_duration:count
-
-synapse_http_server_response_time_seconds            synapse_http_server_response_time:total
-synapse_http_server_response_ru_utime_seconds        synapse_http_server_response_ru_utime:total
-synapse_http_server_response_ru_stime_seconds        synapse_http_server_response_ru_stime:total
-synapse_http_server_response_db_txn_count            synapse_http_server_response_db_txn_count:total
-synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total
-==================================================== ===================================================
-
-
-Standard Metric Names
----------------------
-
-As of synapse version 0.18.2, the format of the process-wide metrics has been
-changed to fit prometheus standard naming conventions. Additionally the units
-have been changed to seconds, from miliseconds.
-
-================================== =============================
-New name                           Old name
-================================== =============================
-process_cpu_user_seconds_total     process_resource_utime / 1000
-process_cpu_system_seconds_total   process_resource_stime / 1000
-process_open_fds (no 'type' label) process_fds
-================================== =============================
-
-The python-specific counts of garbage collector performance have been renamed.
-
-=========================== ======================
-New name                    Old name
-=========================== ======================
-python_gc_time              reactor_gc_time
-python_gc_unreachable_total reactor_gc_unreachable
-python_gc_counts            reactor_gc_counts
-=========================== ======================
-
-The twisted-specific reactor metrics have been renamed.
-
-==================================== =====================
-New name                             Old name
-==================================== =====================
-python_twisted_reactor_pending_calls reactor_pending_calls
-python_twisted_reactor_tick_time     reactor_tick_time
-==================================== =====================
diff --git a/docs/opentracing.md b/docs/opentracing.md
new file mode 100644
index 000000000..4c7a56a5d
--- /dev/null
+++ b/docs/opentracing.md
@@ -0,0 +1,93 @@
+# OpenTracing
+
+## Background
+
+OpenTracing is a semi-standard being adopted by a number of distributed
+tracing platforms. It is a common api for facilitating vendor-agnostic
+tracing instrumentation. That is, we can use the OpenTracing api and
+select one of a number of tracer implementations to do the heavy lifting
+in the background. Our current selected implementation is Jaeger.
+
+OpenTracing is a tool which gives an insight into the causal
+relationship of work done in and between servers. The servers each track
+events and report them to a centralised server - in Synapse's case:
+Jaeger. The basic unit used to represent events is the span. The span
+roughly represents a single piece of work that was done and the time at
+which it occurred. A span can have child spans, meaning that the work of
+the child had to be completed for the parent span to complete, or it can
+have follow-on spans which represent work that is undertaken as a result
+of the parent but is not depended on by the parent to in order to
+finish.
+
+Since this is undertaken in a distributed environment a request to
+another server, such as an RPC or a simple GET, can be considered a span
+(a unit or work) for the local server. This causal link is what
+OpenTracing aims to capture and visualise. In order to do this metadata
+about the local server's span, i.e the 'span context', needs to be
+included with the request to the remote.
+
+It is up to the remote server to decide what it does with the spans it
+creates. This is called the sampling policy and it can be configured
+through Jaeger's settings.
+
+For OpenTracing concepts see
+<https://opentracing.io/docs/overview/what-is-tracing/>.
+
+For more information about Jaeger's implementation see
+<https://www.jaegertracing.io/docs/>
+
+## Setting up OpenTracing
+
+To receive OpenTracing spans, start up a Jaeger server. This can be done
+using docker like so:
+
+```sh
+docker run -d --name jaeger
+  -p 6831:6831/udp \
+  -p 6832:6832/udp \
+  -p 5778:5778 \
+  -p 16686:16686 \
+  -p 14268:14268 \
+  jaegertracing/all-in-one:1.13
+```
+
+Latest documentation is probably at
+<https://www.jaegertracing.io/docs/1.13/getting-started/>
+
+## Enable OpenTracing in Synapse
+
+OpenTracing is not enabled by default. It must be enabled in the
+homeserver config by uncommenting the config options under `opentracing`
+as shown in the [sample config](./sample_config.yaml). For example:
+
+```yaml
+opentracing:
+  tracer_enabled: true
+  homeserver_whitelist:
+    - "mytrustedhomeserver.org"
+    - "*.myotherhomeservers.com"
+```
+
+## Homeserver whitelisting
+
+The homeserver whitelist is configured using regular expressions. A list
+of regular expressions can be given and their union will be compared
+when propagating any spans contexts to another homeserver.
+
+Though it's mostly safe to send and receive span contexts to and from
+untrusted users since span contexts are usually opaque ids it can lead
+to two problems, namely:
+
+-   If the span context is marked as sampled by the sending homeserver
+    the receiver will sample it. Therefore two homeservers with wildly
+    different sampling policies could incur higher sampling counts than
+    intended.
+-   Sending servers can attach arbitrary data to spans, known as
+    'baggage'. For safety this has been disabled in Synapse but that
+    doesn't prevent another server sending you baggage which will be
+    logged to OpenTracing's logs.
+
+## Configuring Jaeger
+
+Sampling strategies can be set as in this document:
+<https://www.jaegertracing.io/docs/1.13/sampling/>
diff --git a/docs/opentracing.rst b/docs/opentracing.rst
deleted file mode 100644
index 6e98ab56b..000000000
--- a/docs/opentracing.rst
+++ /dev/null
@@ -1,123 +0,0 @@
-===========
-OpenTracing
-===========
-
-Background
-----------
-
-OpenTracing is a semi-standard being adopted by a number of distributed tracing
-platforms. It is a common api for facilitating vendor-agnostic tracing
-instrumentation. That is, we can use the OpenTracing api and select one of a
-number of tracer implementations to do the heavy lifting in the background.
-Our current selected implementation is Jaeger.
-
-OpenTracing is a tool which gives an insight into the causal relationship of
-work done in and between servers. The servers each track events and report them
-to a centralised server - in Synapse's case: Jaeger. The basic unit used to
-represent events is the span. The span roughly represents a single piece of work
-that was done and the time at which it occurred. A span can have child spans,
-meaning that the work of the child had to be completed for the parent span to
-complete, or it can have follow-on spans which represent work that is undertaken
-as a result of the parent but is not depended on by the parent to in order to
-finish.
-
-Since this is undertaken in a distributed environment a request to another
-server, such as an RPC or a simple GET, can be considered a span (a unit or
-work) for the local server. This causal link is what OpenTracing aims to
-capture and visualise. In order to do this metadata about the local server's
-span, i.e the 'span context', needs to be included with the request to the
-remote.
-
-It is up to the remote server to decide what it does with the spans
-it creates. This is called the sampling policy and it can be configured
-through Jaeger's settings.
-
-For OpenTracing concepts see
-https://opentracing.io/docs/overview/what-is-tracing/.
-
-For more information about Jaeger's implementation see
-https://www.jaegertracing.io/docs/
-
-=====================
-Seting up OpenTracing
-=====================
-
-To receive OpenTracing spans, start up a Jaeger server. This can be done
-using docker like so:
-
-.. code-block:: bash
-
-   docker run -d --name jaeger
-     -p 6831:6831/udp \
-     -p 6832:6832/udp \
-     -p 5778:5778 \
-     -p 16686:16686 \
-     -p 14268:14268 \
-     jaegertracing/all-in-one:1.13
-
-Latest documentation is probably at
-https://www.jaegertracing.io/docs/1.13/getting-started/
-
-
-Enable OpenTracing in Synapse
------------------------------
-
-OpenTracing is not enabled by default. It must be enabled in the homeserver
-config by uncommenting the config options under ``opentracing`` as shown in
-the `sample config <./sample_config.yaml>`_. For example:
-
-.. code-block:: yaml
-
-  opentracing:
-    tracer_enabled: true
-    homeserver_whitelist:
-      - "mytrustedhomeserver.org"
-      - "*.myotherhomeservers.com"
-
-Homeserver whitelisting
------------------------
-
-The homeserver whitelist is configured using regular expressions. A list of regular
-expressions can be given and their union will be compared when propagating any
-spans contexts to another homeserver.
-
-Though it's mostly safe to send and receive span contexts to and from
-untrusted users since span contexts are usually opaque ids it can lead to
-two problems, namely:
-
-- If the span context is marked as sampled by the sending homeserver the receiver will
-  sample it. Therefore two homeservers with wildly different sampling policies
-  could incur higher sampling counts than intended.
-- Sending servers can attach arbitrary data to spans, known as 'baggage'. For safety this has been disabled in Synapse
-  but that doesn't prevent another server sending you baggage which will be logged
-  to OpenTracing's logs.
-
-==========
-EDU FORMAT
-==========
-
-EDUs can contain tracing data in their content. This is not specced but
-it could be of interest for other homeservers.
-
-EDU format (if you're using jaeger):
-
-.. code-block:: json
-
-   {
-     "edu_type": "type",
-     "content": {
-       "org.matrix.opentracing_context": {
-         "uber-trace-id": "fe57cf3e65083289"
-       }
-     }
-   }
-
-Though you don't have to use jaeger you must inject the span context into
-`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method.
-
-==================
-Configuring Jaeger
-==================
-
-Sampling strategies can be set as in this document:
-https://www.jaegertracing.io/docs/1.13/sampling/
diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md
new file mode 100644
index 000000000..0db1a3804
--- /dev/null
+++ b/docs/password_auth_providers.md
@@ -0,0 +1,116 @@
+# Password auth provider modules
+
+Password auth providers offer a way for server administrators to
+integrate their Synapse installation with an existing authentication
+system.
+
+A password auth provider is a Python class which is dynamically loaded
+into Synapse, and provides a number of methods by which it can integrate
+with the authentication system.
+
+This document serves as a reference for those looking to implement their
+own password auth providers.
+
+## Required methods
+
+Password auth provider classes must provide the following methods:
+
+*class* `SomeProvider.parse_config`(*config*)
+
+> This method is passed the `config` object for this module from the
+> homeserver configuration file.
+>
+> It should perform any appropriate sanity checks on the provided
+> configuration, and return an object which is then passed into
+> `__init__`.
+
+*class* `SomeProvider`(*config*, *account_handler*)
+
+> The constructor is passed the config object returned by
+> `parse_config`, and a `synapse.module_api.ModuleApi` object which
+> allows the password provider to check if accounts exist and/or create
+> new ones.
+
+## Optional methods
+
+Password auth provider classes may optionally provide the following
+methods.
+
+*class* `SomeProvider.get_db_schema_files`()
+
+> This method, if implemented, should return an Iterable of
+> `(name, stream)` pairs of database schema files. Each file is applied
+> in turn at initialisation, and a record is then made in the database
+> so that it is not re-applied on the next start.
+
+`someprovider.get_supported_login_types`()
+
+> This method, if implemented, should return a `dict` mapping from a
+> login type identifier (such as `m.login.password`) to an iterable
+> giving the fields which must be provided by the user in the submission
+> to the `/login` api. These fields are passed in the `login_dict`
+> dictionary to `check_auth`.
+>
+> For example, if a password auth provider wants to implement a custom
+> login type of `com.example.custom_login`, where the client is expected
+> to pass the fields `secret1` and `secret2`, the provider should
+> implement this method and return the following dict:
+>
+>     {"com.example.custom_login": ("secret1", "secret2")}
+
+`someprovider.check_auth`(*username*, *login_type*, *login_dict*)
+
+> This method is the one that does the real work. If implemented, it
+> will be called for each login attempt where the login type matches one
+> of the keys returned by `get_supported_login_types`.
+>
+> It is passed the (possibly UNqualified) `user` provided by the client,
+> the login type, and a dictionary of login secrets passed by the
+> client.
+>
+> The method should return a Twisted `Deferred` object, which resolves
+> to the canonical `@localpart:domain` user id if authentication is
+> successful, and `None` if not.
+>
+> Alternatively, the `Deferred` can resolve to a `(str, func)` tuple, in
+> which case the second field is a callback which will be called with
+> the result from the `/login` call (including `access_token`,
+> `device_id`, etc.)
+
+`someprovider.check_3pid_auth`(*medium*, *address*, *password*)
+
+> This method, if implemented, is called when a user attempts to
+> register or log in with a third party identifier, such as email. It is
+> passed the medium (ex. "email"), an address (ex.
+> "<jdoe@example.com>") and the user's password.
+>
+> The method should return a Twisted `Deferred` object, which resolves
+> to a `str` containing the user's (canonical) User ID if
+> authentication was successful, and `None` if not.
+>
+> As with `check_auth`, the `Deferred` may alternatively resolve to a
+> `(user_id, callback)` tuple.
+
+`someprovider.check_password`(*user_id*, *password*)
+
+> This method provides a simpler interface than
+> `get_supported_login_types` and `check_auth` for password auth
+> providers that just want to provide a mechanism for validating
+> `m.login.password` logins.
+>
+> Iif implemented, it will be called to check logins with an
+> `m.login.password` login type. It is passed a qualified
+> `@localpart:domain` user id, and the password provided by the user.
+>
+> The method should return a Twisted `Deferred` object, which resolves
+> to `True` if authentication is successful, and `False` if not.
+
+`someprovider.on_logged_out`(*user_id*, *device_id*, *access_token*)
+
+> This method, if implemented, is called when a user logs out. It is
+> passed the qualified user ID, the ID of the deactivated device (if
+> any: access tokens are occasionally created without an associated
+> device ID), and the (now deactivated) access token.
+>
+> It may return a Twisted `Deferred` object; the logout request will
+> wait for the deferred to complete but the result is ignored.
diff --git a/docs/password_auth_providers.rst b/docs/password_auth_providers.rst
deleted file mode 100644
index 6149ba745..000000000
--- a/docs/password_auth_providers.rst
+++ /dev/null
@@ -1,113 +0,0 @@
-Password auth provider modules
-==============================
-
-Password auth providers offer a way for server administrators to integrate
-their Synapse installation with an existing authentication system.
-
-A password auth provider is a Python class which is dynamically loaded into
-Synapse, and provides a number of methods by which it can integrate with the
-authentication system.
-
-This document serves as a reference for those looking to implement their own
-password auth providers.
-
-Required methods
-----------------
-
-Password auth provider classes must provide the following methods:
-
-*class* ``SomeProvider.parse_config``\(*config*)
-
-    This method is passed the ``config`` object for this module from the
-    homeserver configuration file.
-
-    It should perform any appropriate sanity checks on the provided
-    configuration, and return an object which is then passed into ``__init__``.
-
-*class* ``SomeProvider``\(*config*, *account_handler*)
-
-    The constructor is passed the config object returned by ``parse_config``,
-    and a ``synapse.module_api.ModuleApi`` object which allows the
-    password provider to check if accounts exist and/or create new ones.
-
-Optional methods
-----------------
-
-Password auth provider classes may optionally provide the following methods.
-
-*class* ``SomeProvider.get_db_schema_files``\()
-
-    This method, if implemented, should return an Iterable of ``(name,
-    stream)`` pairs of database schema files. Each file is applied in turn at
-    initialisation, and a record is then made in the database so that it is
-    not re-applied on the next start.
-
-``someprovider.get_supported_login_types``\()
-
-    This method, if implemented, should return a ``dict`` mapping from a login
-    type identifier (such as ``m.login.password``) to an iterable giving the
-    fields which must be provided by the user in the submission to the
-    ``/login`` api. These fields are passed in the ``login_dict`` dictionary
-    to ``check_auth``.
-
-    For example, if a password auth provider wants to implement a custom login
-    type of ``com.example.custom_login``, where the client is expected to pass
-    the fields ``secret1`` and ``secret2``, the provider should implement this
-    method and return the following dict::
-
-      {"com.example.custom_login": ("secret1", "secret2")}
-
-``someprovider.check_auth``\(*username*, *login_type*, *login_dict*)
-
-    This method is the one that does the real work. If implemented, it will be
-    called for each login attempt where the login type matches one of the keys
-    returned by ``get_supported_login_types``.
-
-    It is passed the (possibly UNqualified) ``user`` provided by the client,
-    the login type, and a dictionary of login secrets passed by the client.
-
-    The method should return a Twisted ``Deferred`` object, which resolves to
-    the canonical ``@localpart:domain`` user id if authentication is successful,
-    and ``None`` if not.
-
-    Alternatively, the ``Deferred`` can resolve to a ``(str, func)`` tuple, in
-    which case the second field is a callback which will be called with the
-    result from the ``/login`` call (including ``access_token``, ``device_id``,
-    etc.)
-
-``someprovider.check_3pid_auth``\(*medium*, *address*, *password*)
-
-    This method, if implemented, is called when a user attempts to register or
-    log in with a third party identifier, such as email. It is passed the
-    medium (ex. "email"), an address (ex. "jdoe@example.com") and the user's
-    password.
-
-    The method should return a Twisted ``Deferred`` object, which resolves to
-    a ``str`` containing the user's (canonical) User ID if authentication was
-    successful, and ``None`` if not.
-
-    As with ``check_auth``, the ``Deferred`` may alternatively resolve to a
-    ``(user_id, callback)`` tuple.
-
-``someprovider.check_password``\(*user_id*, *password*)
-
-    This method provides a simpler interface than ``get_supported_login_types``
-    and ``check_auth`` for password auth providers that just want to provide a
-    mechanism for validating ``m.login.password`` logins.
-
-    Iif implemented, it will be called to check logins with an
-    ``m.login.password`` login type. It is passed a qualified
-    ``@localpart:domain`` user id, and the password provided by the user.
-
-    The method should return a Twisted ``Deferred`` object, which resolves to
-    ``True`` if authentication is successful, and ``False`` if not.
-
-``someprovider.on_logged_out``\(*user_id*, *device_id*, *access_token*)
-
-    This method, if implemented, is called when a user logs out. It is passed
-    the qualified user ID, the ID of the deactivated device (if any: access
-    tokens are occasionally created without an associated device ID), and the
-    (now deactivated) access token.
-
-    It may return a Twisted ``Deferred`` object; the logout request will wait
-    for the deferred to complete but the result is ignored.
diff --git a/docs/postgres.md b/docs/postgres.md
new file mode 100644
index 000000000..29cf76285
--- /dev/null
+++ b/docs/postgres.md
@@ -0,0 +1,164 @@
+# Using Postgres
+
+Postgres version 9.5 or later is known to work.
+
+## Install postgres client libraries
+
+Synapse will require the python postgres client library in order to
+connect to a postgres database.
+
+-   If you are using the [matrix.org debian/ubuntu
+    packages](../INSTALL.md#matrixorg-packages), the necessary python
+    library will already be installed, but you will need to ensure the
+    low-level postgres library is installed, which you can do with
+    `apt install libpq5`.
+-   For other pre-built packages, please consult the documentation from
+    the relevant package.
+-   If you installed synapse [in a
+    virtualenv](../INSTALL.md#installing-from-source), you can install
+    the library with:
+
+        ~/synapse/env/bin/pip install matrix-synapse[postgres]
+
+    (substituting the path to your virtualenv for `~/synapse/env`, if
+    you used a different path). You will require the postgres
+    development files. These are in the `libpq-dev` package on
+    Debian-derived distributions.
+
+## Set up database
+
+Assuming your PostgreSQL database user is called `postgres`, create a
+user `synapse_user` with:
+
+    su - postgres
+    createuser --pwprompt synapse_user
+
+Before you can authenticate with the `synapse_user`, you must create a
+database that it can access. To create a database, first connect to the
+database with your database user:
+
+    su - postgres
+    psql
+
+and then run:
+
+    CREATE DATABASE synapse
+     ENCODING 'UTF8'
+     LC_COLLATE='C'
+     LC_CTYPE='C'
+     template=template0
+     OWNER synapse_user;
+
+This would create an appropriate database named `synapse` owned by the
+`synapse_user` user (which must already have been created as above).
+
+Note that the PostgreSQL database *must* have the correct encoding set
+(as shown above), otherwise it will not be able to store UTF8 strings.
+
+You may need to enable password authentication so `synapse_user` can
+connect to the database. See
+<https://www.postgresql.org/docs/11/auth-pg-hba-conf.html>.
+
+## Tuning Postgres
+
+The default settings should be fine for most deployments. For larger
+scale deployments tuning some of the settings is recommended, details of
+which can be found at
+<https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server>.
+
+In particular, we've found tuning the following values helpful for
+performance:
+
+-   `shared_buffers`
+-   `effective_cache_size`
+-   `work_mem`
+-   `maintenance_work_mem`
+-   `autovacuum_work_mem`
+
+Note that the appropriate values for those fields depend on the amount
+of free memory the database host has available.
+
+## Synapse config
+
+When you are ready to start using PostgreSQL, edit the `database`
+section in your config file to match the following lines:
+
+    database:
+        name: psycopg2
+        args:
+            user: <user>
+            password: <pass>
+            database: <db>
+            host: <host>
+            cp_min: 5
+            cp_max: 10
+
+All key, values in `args` are passed to the `psycopg2.connect(..)`
+function, except keys beginning with `cp_`, which are consumed by the
+twisted adbapi connection pool.
+
+## Porting from SQLite
+
+### Overview
+
+The script `synapse_port_db` allows porting an existing synapse server
+backed by SQLite to using PostgreSQL. This is done in as a two phase
+process:
+
+1.  Copy the existing SQLite database to a separate location (while the
+    server is down) and running the port script against that offline
+    database.
+2.  Shut down the server. Rerun the port script to port any data that
+    has come in since taking the first snapshot. Restart server against
+    the PostgreSQL database.
+
+The port script is designed to be run repeatedly against newer snapshots
+of the SQLite database file. This makes it safe to repeat step 1 if
+there was a delay between taking the previous snapshot and being ready
+to do step 2.
+
+It is safe to at any time kill the port script and restart it.
+
+### Using the port script
+
+Firstly, shut down the currently running synapse server and copy its
+database file (typically `homeserver.db`) to another location. Once the
+copy is complete, restart synapse. For instance:
+
+    ./synctl stop
+    cp homeserver.db homeserver.db.snapshot
+    ./synctl start
+
+Copy the old config file into a new config file:
+
+    cp homeserver.yaml homeserver-postgres.yaml
+
+Edit the database section as described in the section *Synapse config*
+above and with the SQLite snapshot located at `homeserver.db.snapshot`
+simply run:
+
+    synapse_port_db --sqlite-database homeserver.db.snapshot \
+        --postgres-config homeserver-postgres.yaml
+
+The flag `--curses` displays a coloured curses progress UI.
+
+If the script took a long time to complete, or time has otherwise passed
+since the original snapshot was taken, repeat the previous steps with a
+newer snapshot.
+
+To complete the conversion shut down the synapse server and run the port
+script one last time, e.g. if the SQLite database is at `homeserver.db`
+run:
+
+    synapse_port_db --sqlite-database homeserver.db \
+        --postgres-config homeserver-postgres.yaml
+
+Once that has completed, change the synapse config to point at the
+PostgreSQL database configuration file `homeserver-postgres.yaml`:
+
+    ./synctl stop
+    mv homeserver.yaml homeserver-old-sqlite.yaml
+    mv homeserver-postgres.yaml homeserver.yaml
+    ./synctl start
+
+Synapse should now be running against PostgreSQL.
diff --git a/docs/postgres.rst b/docs/postgres.rst
deleted file mode 100644
index e08a5116b..000000000
--- a/docs/postgres.rst
+++ /dev/null
@@ -1,166 +0,0 @@
-Using Postgres
---------------
-
-Postgres version 9.5 or later is known to work.
-
-Install postgres client libraries
-=================================
-
-Synapse will require the python postgres client library in order to connect to
-a postgres database.
-
-* If you are using the `matrix.org debian/ubuntu
-  packages <../INSTALL.md#matrixorg-packages>`_,
-  the necessary python library will already be installed, but you will need to
-  ensure the low-level postgres library is installed, which you can do with
-  ``apt install libpq5``.
-
-* For other pre-built packages, please consult the documentation from the
-  relevant package.
-
-* If you installed synapse `in a virtualenv
-  <../INSTALL.md#installing-from-source>`_, you can install the library with::
-
-      ~/synapse/env/bin/pip install matrix-synapse[postgres]
-
-  (substituting the path to your virtualenv for ``~/synapse/env``, if you used a
-  different path). You will require the postgres development files. These are in
-  the ``libpq-dev`` package on Debian-derived distributions.
-
-Set up database
-===============
-
-Assuming your PostgreSQL database user is called ``postgres``, create a user
-``synapse_user`` with::
-
-   su - postgres
-   createuser --pwprompt synapse_user
-
-Before you can authenticate with the ``synapse_user``, you must create a
-database that it can access. To create a database, first connect to the database
-with your database user::
-
-   su - postgres
-   psql
-
-and then run::
-
-   CREATE DATABASE synapse
-    ENCODING 'UTF8'
-    LC_COLLATE='C'
-    LC_CTYPE='C'
-    template=template0
-    OWNER synapse_user;
-
-This would create an appropriate database named ``synapse`` owned by the
-``synapse_user`` user (which must already have been created as above).
-
-Note that the PostgreSQL database *must* have the correct encoding set (as
-shown above), otherwise it will not be able to store UTF8 strings.
-
-You may need to enable password authentication so ``synapse_user`` can connect
-to the database. See https://www.postgresql.org/docs/11/auth-pg-hba-conf.html.
-
-Tuning Postgres
-===============
-
-The default settings should be fine for most deployments. For larger scale
-deployments tuning some of the settings is recommended, details of which can be
-found at https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server.
-
-In particular, we've found tuning the following values helpful for performance:
-
-- ``shared_buffers``
-- ``effective_cache_size``
-- ``work_mem``
-- ``maintenance_work_mem``
-- ``autovacuum_work_mem``
-
-Note that the appropriate values for those fields depend on the amount of free
-memory the database host has available.
-
-Synapse config
-==============
-
-When you are ready to start using PostgreSQL, edit the ``database`` section in
-your config file to match the following lines::
-
-    database:
-        name: psycopg2
-        args:
-            user: <user>
-            password: <pass>
-            database: <db>
-            host: <host>
-            cp_min: 5
-            cp_max: 10
-
-All key, values in ``args`` are passed to the ``psycopg2.connect(..)``
-function, except keys beginning with ``cp_``, which are consumed by the twisted
-adbapi connection pool.
-
-
-Porting from SQLite
-===================
-
-Overview
-~~~~~~~~
-
-The script ``synapse_port_db`` allows porting an existing synapse server
-backed by SQLite to using PostgreSQL. This is done in as a two phase process:
-
-1. Copy the existing SQLite database to a separate location (while the server
-   is down) and running the port script against that offline database.
-2. Shut down the server. Rerun the port script to port any data that has come
-   in since taking the first snapshot. Restart server against the PostgreSQL
-   database.
-
-The port script is designed to be run repeatedly against newer snapshots of the
-SQLite database file. This makes it safe to repeat step 1 if there was a delay
-between taking the previous snapshot and being ready to do step 2.
-
-It is safe to at any time kill the port script and restart it.
-
-Using the port script
-~~~~~~~~~~~~~~~~~~~~~
-
-Firstly, shut down the currently running synapse server and copy its database
-file (typically ``homeserver.db``) to another location. Once the copy is
-complete, restart synapse.  For instance::
-
-    ./synctl stop
-    cp homeserver.db homeserver.db.snapshot
-    ./synctl start
-
-Copy the old config file into a new config file::
-
-    cp homeserver.yaml homeserver-postgres.yaml
-
-Edit the database section as described in the section *Synapse config* above
-and with the SQLite snapshot located at ``homeserver.db.snapshot`` simply run::
-
-    synapse_port_db --sqlite-database homeserver.db.snapshot \
-        --postgres-config homeserver-postgres.yaml
-
-The flag ``--curses`` displays a coloured curses progress UI.
-
-If the script took a long time to complete, or time has otherwise passed since
-the original snapshot was taken, repeat the previous steps with a newer
-snapshot.
-
-To complete the conversion shut down the synapse server and run the port
-script one last time, e.g. if the SQLite database is at  ``homeserver.db``
-run::
-
-    synapse_port_db --sqlite-database homeserver.db \
-        --postgres-config homeserver-postgres.yaml
-
-Once that has completed, change the synapse config to point at the PostgreSQL
-database configuration file ``homeserver-postgres.yaml``::
-
-    ./synctl stop
-    mv homeserver.yaml homeserver-old-sqlite.yaml
-    mv homeserver-postgres.yaml homeserver.yaml
-    ./synctl start
-
-Synapse should now be running against PostgreSQL.
diff --git a/docs/replication.md b/docs/replication.md
new file mode 100644
index 000000000..ed8823315
--- /dev/null
+++ b/docs/replication.md
@@ -0,0 +1,37 @@
+# Replication Architecture
+
+## Motivation
+
+We'd like to be able to split some of the work that synapse does into
+multiple python processes. In theory multiple synapse processes could
+share a single postgresql database and we\'d scale up by running more
+synapse processes. However much of synapse assumes that only one process
+is interacting with the database, both for assigning unique identifiers
+when inserting into tables, notifying components about new updates, and
+for invalidating its caches.
+
+So running multiple copies of the current code isn't an option. One way
+to run multiple processes would be to have a single writer process and
+multiple reader processes connected to the same database. In order to do
+this we'd need a way for the reader process to invalidate its in-memory
+caches when an update happens on the writer. One way to do this is for
+the writer to present an append-only log of updates which the readers
+can consume to invalidate their caches and to push updates to listening
+clients or pushers.
+
+Synapse already stores much of its data as an append-only log so that it
+can correctly respond to `/sync` requests so the amount of code changes
+needed to expose the append-only log to the readers should be fairly
+minimal.
+
+## Architecture
+
+### The Replication Protocol
+
+See [tcp_replication.md](tcp_replication.md)
+
+### The Slaved DataStore
+
+There are read-only version of the synapse storage layer in
+`synapse/replication/slave/storage` that use the response of the
+replication API to invalidate their caches.
diff --git a/docs/replication.rst b/docs/replication.rst
deleted file mode 100644
index 310abb348..000000000
--- a/docs/replication.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-Replication Architecture
-========================
-
-Motivation
-----------
-
-We'd like to be able to split some of the work that synapse does into multiple
-python processes. In theory multiple synapse processes could share a single
-postgresql database and we'd scale up by running more synapse processes.
-However much of synapse assumes that only one process is interacting with the
-database, both for assigning unique identifiers when inserting into tables,
-notifying components about new updates, and for invalidating its caches.
-
-So running multiple copies of the current code isn't an option. One way to
-run multiple processes would be to have a single writer process and multiple
-reader processes connected to the same database. In order to do this we'd need
-a way for the reader process to invalidate its in-memory caches when an update
-happens on the writer. One way to do this is for the writer to present an
-append-only log of updates which the readers can consume to invalidate their
-caches and to push updates to listening clients or pushers.
-
-Synapse already stores much of its data as an append-only log so that it can
-correctly respond to /sync requests so the amount of code changes needed to
-expose the append-only log to the readers should be fairly minimal.
-
-Architecture
-------------
-
-The Replication Protocol
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-See ``tcp_replication.rst``
-
-
-The Slaved DataStore
-~~~~~~~~~~~~~~~~~~~~
-
-There are read-only version of the synapse storage layer in
-``synapse/replication/slave/storage`` that use the response of the replication
-API to invalidate their caches.
diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md
new file mode 100644
index 000000000..dcfc5c64a
--- /dev/null
+++ b/docs/reverse_proxy.md
@@ -0,0 +1,123 @@
+# Using a reverse proxy with Synapse
+
+It is recommended to put a reverse proxy such as
+[nginx](https://nginx.org/en/docs/http/ngx_http_proxy_module.html),
+[Apache](https://httpd.apache.org/docs/current/mod/mod_proxy_http.html),
+[Caddy](https://caddyserver.com/docs/proxy) or
+[HAProxy](https://www.haproxy.org/) in front of Synapse. One advantage
+of doing so is that it means that you can expose the default https port
+(443) to Matrix clients without needing to run Synapse with root
+privileges.
+
+> **NOTE**: Your reverse proxy must not `canonicalise` or `normalise`
+the requested URI in any way (for example, by decoding `%xx` escapes).
+Beware that Apache *will* canonicalise URIs unless you specifify
+`nocanon`.
+
+When setting up a reverse proxy, remember that Matrix clients and other
+Matrix servers do not necessarily need to connect to your server via the
+same server name or port. Indeed, clients will use port 443 by default,
+whereas servers default to port 8448. Where these are different, we
+refer to the 'client port' and the \'federation port\'. See [Setting
+up federation](federate.md) for more details of the algorithm used for
+federation connections.
+
+Let's assume that we expect clients to connect to our server at
+`https://matrix.example.com`, and other servers to connect at
+`https://example.com:8448`.  The following sections detail the configuration of
+the reverse proxy and the homeserver.
+
+## Webserver configuration examples
+
+> **NOTE**: You only need one of these.
+
+### nginx
+
+        server {
+            listen 443 ssl;
+            listen [::]:443 ssl;
+            server_name matrix.example.com;
+
+            location /_matrix {
+                proxy_pass http://localhost:8008;
+                proxy_set_header X-Forwarded-For $remote_addr;
+            }
+        }
+
+        server {
+            listen 8448 ssl default_server;
+            listen [::]:8448 ssl default_server;
+            server_name example.com;
+
+            location / {
+                proxy_pass http://localhost:8008;
+                proxy_set_header X-Forwarded-For $remote_addr;
+            }
+        }
+
+> **NOTE**: Do not add a `/` after the port in `proxy_pass`, otherwise nginx will
+canonicalise/normalise the URI.
+
+### Caddy
+
+        matrix.example.com {
+          proxy /_matrix http://localhost:8008 {
+            transparent
+          }
+        }
+
+        example.com:8448 {
+          proxy / http://localhost:8008 {
+            transparent
+          }
+        }
+
+### Apache
+
+        <VirtualHost *:443>
+            SSLEngine on
+            ServerName matrix.example.com;
+
+            AllowEncodedSlashes NoDecode
+            ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon
+            ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix
+        </VirtualHost>
+
+        <VirtualHost *:8448>
+            SSLEngine on
+            ServerName example.com;
+
+            AllowEncodedSlashes NoDecode
+            ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon
+            ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix
+        </VirtualHost>
+
+> **NOTE**: ensure the  `nocanon` options are included.
+
+### HAProxy
+
+        frontend https
+          bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1
+
+          # Matrix client traffic
+          acl matrix-host hdr(host) -i matrix.example.com
+          acl matrix-path path_beg /_matrix
+
+          use_backend matrix if matrix-host matrix-path
+
+        frontend matrix-federation
+          bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1
+          default_backend matrix
+
+        backend matrix
+          server matrix 127.0.0.1:8008
+
+## Homeserver Configuration
+
+You will also want to set `bind_addresses: ['127.0.0.1']` and
+`x_forwarded: true` for port 8008 in `homeserver.yaml` to ensure that
+client IP addresses are recorded correctly.
+
+Having done so, you can then use `https://matrix.example.com` (instead
+of `https://matrix.example.com:8448`) as the "Custom server" when
+connecting to Synapse from a client.
diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst
deleted file mode 100644
index 4b640ffc4..000000000
--- a/docs/reverse_proxy.rst
+++ /dev/null
@@ -1,112 +0,0 @@
-Using a reverse proxy with Synapse
-==================================
-
-It is recommended to put a reverse proxy such as
-`nginx <https://nginx.org/en/docs/http/ngx_http_proxy_module.html>`_,
-`Apache <https://httpd.apache.org/docs/current/mod/mod_proxy_http.html>`_,
-`Caddy <https://caddyserver.com/docs/proxy>`_ or
-`HAProxy <https://www.haproxy.org/>`_ in front of Synapse. One advantage of
-doing so is that it means that you can expose the default https port (443) to
-Matrix clients without needing to run Synapse with root privileges.
-
-**NOTE**: Your reverse proxy must not 'canonicalise' or 'normalise' the
-requested URI in any way (for example, by decoding ``%xx`` escapes). Beware
-that Apache *will* canonicalise URIs unless you specifify ``nocanon``.
-
-When setting up a reverse proxy, remember that Matrix clients and other Matrix
-servers do not necessarily need to connect to your server via the same server
-name or port. Indeed, clients will use port 443 by default, whereas servers
-default to port 8448. Where these are different, we refer to the 'client port'
-and the 'federation port'. See `Setting up federation
-<federate.md>`_ for more details of the algorithm used for
-federation connections.
-
-Let's assume that we expect clients to connect to our server at
-``https://matrix.example.com``, and other servers to connect at
-``https://example.com:8448``. Here are some example configurations:
-
-* nginx::
-
-      server {
-          listen 443 ssl;
-          listen [::]:443 ssl;
-          server_name matrix.example.com;
-
-          location /_matrix {
-              proxy_pass http://localhost:8008;
-              proxy_set_header X-Forwarded-For $remote_addr;
-          }
-      }
-
-      server {
-          listen 8448 ssl default_server;
-          listen [::]:8448 ssl default_server;
-          server_name example.com;
-
-          location / {
-              proxy_pass http://localhost:8008;
-              proxy_set_header X-Forwarded-For $remote_addr;
-          }
-      }
-      
-  Do not add a `/` after the port in `proxy_pass`, otherwise nginx will canonicalise/normalise the URI.
-
-* Caddy::
-
-      matrix.example.com {
-        proxy /_matrix http://localhost:8008 {
-          transparent
-        }
-      }
-
-      example.com:8448 {
-        proxy / http://localhost:8008 {
-          transparent
-        }
-      }
-
-* Apache (note the ``nocanon`` options here!)::
-
-      <VirtualHost *:443>
-          SSLEngine on
-          ServerName matrix.example.com;
-
-          AllowEncodedSlashes NoDecode
-          ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon
-          ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix
-      </VirtualHost>
-
-      <VirtualHost *:8448>
-          SSLEngine on
-          ServerName example.com;
-          
-          AllowEncodedSlashes NoDecode
-          ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon
-          ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix
-      </VirtualHost>
-
-* HAProxy::
-
-      frontend https
-        bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1
-
-        # Matrix client traffic
-        acl matrix-host hdr(host) -i matrix.example.com
-        acl matrix-path path_beg /_matrix
-
-        use_backend matrix if matrix-host matrix-path
-
-      frontend matrix-federation
-        bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1
-        default_backend matrix
-
-      backend matrix
-        server matrix 127.0.0.1:8008
-
-You will also want to set ``bind_addresses: ['127.0.0.1']`` and ``x_forwarded: true``
-for port 8008 in ``homeserver.yaml`` to ensure that client IP addresses are
-recorded correctly.
-
-Having done so, you can then use ``https://matrix.example.com`` (instead of
-``https://matrix.example.com:8448``) as the "Custom server" when connecting to
-Synapse from a client.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index dd4e2d5eb..d5a8d24c2 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -136,8 +136,8 @@ federation_ip_range_blacklist:
 #
 #   type: the type of listener. Normally 'http', but other valid options are:
 #       'manhole' (see docs/manhole.md),
-#       'metrics' (see docs/metrics-howto.rst),
-#       'replication' (see docs/workers.rst).
+#       'metrics' (see docs/metrics-howto.md),
+#       'replication' (see docs/workers.md).
 #
 #   tls: set to true to enable TLS for this listener. Will use the TLS
 #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -172,12 +172,12 @@ federation_ip_range_blacklist:
 #
 #   media: the media API (/_matrix/media).
 #
-#   metrics: the metrics interface. See docs/metrics-howto.rst.
+#   metrics: the metrics interface. See docs/metrics-howto.md.
 #
 #   openid: OpenID authentication.
 #
 #   replication: the HTTP replication API (/_synapse/replication). See
-#       docs/workers.rst.
+#       docs/workers.md.
 #
 #   static: static resources under synapse/static (/_matrix/static). (Mostly
 #       useful for 'fallback authentication'.)
@@ -201,7 +201,7 @@ listeners:
   # that unwraps TLS.
   #
   # If you plan to use a reverse proxy, please see
-  # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst.
+  # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
   #
   - port: 8008
     tls: false
@@ -1520,7 +1520,7 @@ opentracing:
     #enabled: true
 
     # The list of homeservers we wish to send and receive span contexts and span baggage.
-    # See docs/opentracing.rst
+    # See docs/opentracing.md
     # This is a list of regexes which are matched against the server_name of the
     # homeserver.
     #
diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md
new file mode 100644
index 000000000..e099d8a87
--- /dev/null
+++ b/docs/tcp_replication.md
@@ -0,0 +1,249 @@
+# TCP Replication
+
+## Motivation
+
+Previously the workers used an HTTP long poll mechanism to get updates
+from the master, which had the problem of causing a lot of duplicate
+work on the server. This TCP protocol replaces those APIs with the aim
+of increased efficiency.
+
+## Overview
+
+The protocol is based on fire and forget, line based commands. An
+example flow would be (where '>' indicates master to worker and
+'<' worker to master flows):
+
+    > SERVER example.com
+    < REPLICATE events 53
+    > RDATA events 54 ["$foo1:bar.com", ...]
+    > RDATA events 55 ["$foo4:bar.com", ...]
+
+The example shows the server accepting a new connection and sending its
+identity with the `SERVER` command, followed by the client asking to
+subscribe to the `events` stream from the token `53`. The server then
+periodically sends `RDATA` commands which have the format
+`RDATA <stream_name> <token> <row>`, where the format of `<row>` is
+defined by the individual streams.
+
+Error reporting happens by either the client or server sending an ERROR
+command, and usually the connection will be closed.
+
+Since the protocol is a simple line based, its possible to manually
+connect to the server using a tool like netcat. A few things should be
+noted when manually using the protocol:
+
+-   When subscribing to a stream using `REPLICATE`, the special token
+    `NOW` can be used to get all future updates. The special stream name
+    `ALL` can be used with `NOW` to subscribe to all available streams.
+-   The federation stream is only available if federation sending has
+    been disabled on the main process.
+-   The server will only time connections out that have sent a `PING`
+    command. If a ping is sent then the connection will be closed if no
+    further commands are receieved within 15s. Both the client and
+    server protocol implementations will send an initial PING on
+    connection and ensure at least one command every 5s is sent (not
+    necessarily `PING`).
+-   `RDATA` commands *usually* include a numeric token, however if the
+    stream has multiple rows to replicate per token the server will send
+    multiple `RDATA` commands, with all but the last having a token of
+    `batch`. See the documentation on `commands.RdataCommand` for
+    further details.
+
+## Architecture
+
+The basic structure of the protocol is line based, where the initial
+word of each line specifies the command. The rest of the line is parsed
+based on the command. For example, the RDATA command is defined as:
+
+    RDATA <stream_name> <token> <row_json>
+
+(Note that <row_json> may contains spaces, but cannot contain
+newlines.)
+
+Blank lines are ignored.
+
+### Keep alives
+
+Both sides are expected to send at least one command every 5s or so, and
+should send a `PING` command if necessary. If either side do not receive
+a command within e.g. 15s then the connection should be closed.
+
+Because the server may be connected to manually using e.g. netcat, the
+timeouts aren't enabled until an initial `PING` command is seen. Both
+the client and server implementations below send a `PING` command
+immediately on connection to ensure the timeouts are enabled.
+
+This ensures that both sides can quickly realize if the tcp connection
+has gone and handle the situation appropriately.
+
+### Start up
+
+When a new connection is made, the server:
+
+-   Sends a `SERVER` command, which includes the identity of the server,
+    allowing the client to detect if its connected to the expected
+    server
+-   Sends a `PING` command as above, to enable the client to time out
+    connections promptly.
+
+The client:
+
+-   Sends a `NAME` command, allowing the server to associate a human
+    friendly name with the connection. This is optional.
+-   Sends a `PING` as above
+-   For each stream the client wishes to subscribe to it sends a
+    `REPLICATE` with the `stream_name` and token it wants to subscribe
+    from.
+-   On receipt of a `SERVER` command, checks that the server name
+    matches the expected server name.
+
+### Error handling
+
+If either side detects an error it can send an `ERROR` command and close
+the connection.
+
+If the client side loses the connection to the server it should
+reconnect, following the steps above.
+
+### Congestion
+
+If the server sends messages faster than the client can consume them the
+server will first buffer a (fairly large) number of commands and then
+disconnect the client. This ensures that we don't queue up an unbounded
+number of commands in memory and gives us a potential oppurtunity to
+squawk loudly. When/if the client recovers it can reconnect to the
+server and ask for missed messages.
+
+### Reliability
+
+In general the replication stream should be considered an unreliable
+transport since e.g. commands are not resent if the connection
+disappears.
+
+The exception to that are the replication streams, i.e. RDATA commands,
+since these include tokens which can be used to restart the stream on
+connection errors.
+
+The client should keep track of the token in the last RDATA command
+received for each stream so that on reconneciton it can start streaming
+from the correct place. Note: not all RDATA have valid tokens due to
+batching. See `RdataCommand` for more details.
+
+### Example
+
+An example iteraction is shown below. Each line is prefixed with '>'
+or '<' to indicate which side is sending, these are *not* included on
+the wire:
+
+    * connection established *
+    > SERVER localhost:8823
+    > PING 1490197665618
+    < NAME synapse.app.appservice
+    < PING 1490197665618
+    < REPLICATE events 1
+    < REPLICATE backfill 1
+    < REPLICATE caches 1
+    > POSITION events 1
+    > POSITION backfill 1
+    > POSITION caches 1
+    > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
+    > RDATA events 14 ["$149019767112vOHxz:localhost:8823",
+        "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null]
+    < PING 1490197675618
+    > ERROR server stopping
+    * connection closed by server *
+
+The `POSITION` command sent by the server is used to set the clients
+position without needing to send data with the `RDATA` command.
+
+An example of a batched set of `RDATA` is:
+
+    > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513]
+    > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513]
+    > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513]
+    > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513]
+
+In this case the client shouldn't advance their caches token until it
+sees the the last `RDATA`.
+
+### List of commands
+
+The list of valid commands, with which side can send it: server (S) or
+client (C):
+
+#### SERVER (S)
+
+   Sent at the start to identify which server the client is talking to
+
+#### RDATA (S)
+
+   A single update in a stream
+
+#### POSITION (S)
+
+   The position of the stream has been updated. Sent to the client
+    after all missing updates for a stream have been sent to the client
+    and they're now up to date.
+
+#### ERROR (S, C)
+
+   There was an error
+
+#### PING (S, C)
+
+   Sent periodically to ensure the connection is still alive
+
+#### NAME (C)
+
+   Sent at the start by client to inform the server who they are
+
+#### REPLICATE (C)
+
+   Asks the server to replicate a given stream
+
+#### USER_SYNC (C)
+
+   A user has started or stopped syncing
+
+#### FEDERATION_ACK (C)
+
+   Acknowledge receipt of some federation data
+
+#### REMOVE_PUSHER (C)
+
+   Inform the server a pusher should be removed
+
+#### INVALIDATE_CACHE (C)
+
+   Inform the server a cache should be invalidated
+
+#### SYNC (S, C)
+
+   Used exclusively in tests
+
+See `synapse/replication/tcp/commands.py` for a detailed description and
+the format of each command.
+
+### Cache Invalidation Stream
+
+The cache invalidation stream is used to inform workers when they need
+to invalidate any of their caches in the data store. This is done by
+streaming all cache invalidations done on master down to the workers,
+assuming that any caches on the workers also exist on the master.
+
+Each individual cache invalidation results in a row being sent down
+replication, which includes the cache name (the name of the function)
+and they key to invalidate. For example:
+
+    > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251]
+
+However, there are times when a number of caches need to be invalidated
+at the same time with the same key. To reduce traffic we batch those
+invalidations into a single poke by defining a special cache name that
+workers understand to mean to expand to invalidate the correct caches.
+
+Currently the special cache names are declared in
+`synapse/storage/_base.py` and are:
+
+1.  `cs_cache_fake` ─ invalidates caches that depend on the current
+    state
diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst
deleted file mode 100644
index 75e723484..000000000
--- a/docs/tcp_replication.rst
+++ /dev/null
@@ -1,249 +0,0 @@
-TCP Replication
-===============
-
-Motivation
-----------
-
-Previously the workers used an HTTP long poll mechanism to get updates from the
-master, which had the problem of causing a lot of duplicate work on the server.
-This TCP protocol replaces those APIs with the aim of increased efficiency.
-
-
-
-Overview
---------
-
-The protocol is based on fire and forget, line based commands. An example flow
-would be (where '>' indicates master to worker and '<' worker to master flows)::
-
-    > SERVER example.com
-    < REPLICATE events 53
-    > RDATA events 54 ["$foo1:bar.com", ...]
-    > RDATA events 55 ["$foo4:bar.com", ...]
-
-The example shows the server accepting a new connection and sending its identity
-with the ``SERVER`` command, followed by the client asking to subscribe to the
-``events`` stream from the token ``53``. The server then periodically sends ``RDATA``
-commands which have the format ``RDATA <stream_name> <token> <row>``, where the
-format of ``<row>`` is defined by the individual streams.
-
-Error reporting happens by either the client or server sending an `ERROR`
-command, and usually the connection will be closed.
-
-
-Since the protocol is a simple line based, its possible to manually connect to
-the server using a tool like netcat. A few things should be noted when manually
-using the protocol:
-
-* When subscribing to a stream using ``REPLICATE``, the special token ``NOW`` can
-  be used to get all future updates. The special stream name ``ALL`` can be used
-  with ``NOW`` to subscribe to all available streams.
-* The federation stream is only available if federation sending has been
-  disabled on the main process.
-* The server will only time connections out that have sent a ``PING`` command.
-  If a ping is sent then the connection will be closed if no further commands
-  are receieved within 15s. Both the client and server protocol implementations
-  will send an initial PING on connection and ensure at least one command every
-  5s is sent (not necessarily ``PING``).
-* ``RDATA`` commands *usually* include a numeric token, however if the stream
-  has multiple rows to replicate per token the server will send multiple
-  ``RDATA`` commands, with all but the last having a token of ``batch``. See
-  the documentation on ``commands.RdataCommand`` for further details.
-
-
-Architecture
-------------
-
-The basic structure of the protocol is line based, where the initial word of
-each line specifies the command. The rest of the line is parsed based on the
-command. For example, the `RDATA` command is defined as::
-
-    RDATA <stream_name> <token> <row_json>
-
-(Note that `<row_json>` may contains spaces, but cannot contain newlines.)
-
-Blank lines are ignored.
-
-
-Keep alives
-~~~~~~~~~~~
-
-Both sides are expected to send at least one command every 5s or so, and
-should send a ``PING`` command if necessary. If either side do not receive a
-command within e.g. 15s then the connection should be closed.
-
-Because the server may be connected to manually using e.g. netcat, the timeouts
-aren't enabled until an initial ``PING`` command is seen. Both the client and
-server implementations below send a ``PING`` command immediately on connection to
-ensure the timeouts are enabled.
-
-This ensures that both sides can quickly realize if the tcp connection has gone
-and handle the situation appropriately.
-
-
-Start up
-~~~~~~~~
-
-When a new connection is made, the server:
-
-* Sends a ``SERVER`` command, which includes the identity of the server, allowing
-  the client to detect if its connected to the expected server
-* Sends a ``PING`` command as above, to enable the client to time out connections
-  promptly.
-
-The client:
-
-* Sends a ``NAME`` command, allowing the server to associate a human friendly
-  name with the connection. This is optional.
-* Sends a ``PING`` as above
-* For each stream the client wishes to subscribe to it sends a ``REPLICATE``
-  with the stream_name and token it wants to subscribe from.
-* On receipt of a ``SERVER`` command, checks that the server name matches the
-  expected server name.
-
-
-Error handling
-~~~~~~~~~~~~~~
-
-If either side detects an error it can send an ``ERROR`` command and close the
-connection.
-
-If the client side loses the connection to the server it should reconnect,
-following the steps above.
-
-
-Congestion
-~~~~~~~~~~
-
-If the server sends messages faster than the client can consume them the server
-will first buffer a (fairly large) number of commands and then disconnect the
-client. This ensures that we don't queue up an unbounded number of commands in
-memory and gives us a potential oppurtunity to squawk loudly. When/if the client
-recovers it can reconnect to the server and ask for missed messages.
-
-
-Reliability
-~~~~~~~~~~~
-
-In general the replication stream should be considered an unreliable transport
-since e.g. commands are not resent if the connection disappears.
-
-The exception to that are the replication streams, i.e. RDATA commands, since
-these include tokens which can be used to restart the stream on connection
-errors.
-
-The client should keep track of the token in the last RDATA command received
-for each stream so that on reconneciton it can start streaming from the correct
-place. Note: not all RDATA have valid tokens due to batching. See
-``RdataCommand`` for more details.
-
-Example
-~~~~~~~
-
-An example iteraction is shown below. Each line is prefixed with '>' or '<' to
-indicate which side is sending, these are *not* included on the wire::
-
-    * connection established *
-    > SERVER localhost:8823
-    > PING 1490197665618
-    < NAME synapse.app.appservice
-    < PING 1490197665618
-    < REPLICATE events 1
-    < REPLICATE backfill 1
-    < REPLICATE caches 1
-    > POSITION events 1
-    > POSITION backfill 1
-    > POSITION caches 1
-    > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
-    > RDATA events 14 ["$149019767112vOHxz:localhost:8823",
-        "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null]
-    < PING 1490197675618
-    > ERROR server stopping
-    * connection closed by server *
-
-The ``POSITION`` command sent by the server is used to set the clients position
-without needing to send data with the ``RDATA`` command.
-
-
-An example of a batched set of ``RDATA`` is::
-
-    > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513]
-    > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513]
-    > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513]
-    > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513]
-
-In this case the client shouldn't advance their caches token until it sees the
-the last ``RDATA``.
-
-
-List of commands
-~~~~~~~~~~~~~~~~
-
-The list of valid commands, with which side can send it: server (S) or client (C):
-
-SERVER (S)
-    Sent at the start to identify which server the client is talking to
-
-RDATA (S)
-    A single update in a stream
-
-POSITION (S)
-    The position of the stream has been updated. Sent to the client after all
-    missing updates for a stream have been sent to the client and they're now
-    up to date.
-
-ERROR (S, C)
-    There was an error
-
-PING (S, C)
-    Sent periodically to ensure the connection is still alive
-
-NAME (C)
-    Sent at the start by client to inform the server who they are
-
-REPLICATE (C)
-    Asks the server to replicate a given stream
-
-USER_SYNC (C)
-    A user has started or stopped syncing
-
-FEDERATION_ACK (C)
-    Acknowledge receipt of some federation data
-
-REMOVE_PUSHER (C)
-    Inform the server a pusher should be removed
-
-INVALIDATE_CACHE (C)
-    Inform the server a cache should be invalidated
-
-SYNC (S, C)
-    Used exclusively in tests
-
-
-See ``synapse/replication/tcp/commands.py`` for a detailed description and the
-format of each command.
-
-
-Cache Invalidation Stream
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The cache invalidation stream is used to inform workers when they need to
-invalidate any of their caches in the data store. This is done by streaming all
-cache invalidations done on master down to the workers, assuming that any caches
-on the workers also exist on the master.
-
-Each individual cache invalidation results in a row being sent down replication,
-which includes the cache name (the name of the function) and they key to
-invalidate. For example::
-
-    > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251]
-
-However, there are times when a number of caches need to be invalidated at the
-same time with the same key. To reduce traffic we batch those invalidations into
-a single poke by defining a special cache name that workers understand to mean
-to expand to invalidate the correct caches.
-
-Currently the special cache names are declared in ``synapse/storage/_base.py``
-and are:
-
-1. ``cs_cache_fake`` ─ invalidates caches that depend on the current state
diff --git a/docs/turn-howto.md b/docs/turn-howto.md
new file mode 100644
index 000000000..4a983621e
--- /dev/null
+++ b/docs/turn-howto.md
@@ -0,0 +1,123 @@
+# Overview
+
+This document explains how to enable VoIP relaying on your Home Server with
+TURN.
+
+The synapse Matrix Home Server supports integration with TURN server via the
+[TURN server REST API](<http://tools.ietf.org/html/draft-uberti-behave-turn-rest-00>). This
+allows the Home Server to generate credentials that are valid for use on the
+TURN server through the use of a secret shared between the Home Server and the
+TURN server.
+
+The following sections describe how to install [coturn](<https://github.com/coturn/coturn>) (which implements the TURN REST API) and integrate it with synapse.
+
+## `coturn` Setup
+
+### Initial installation
+
+The TURN daemon `coturn` is available from a variety of sources such as native package managers, or installation from source.
+
+#### Debian installation
+
+    # apt install coturn
+
+#### Source installation
+
+1. Download the [latest release](https://github.com/coturn/coturn/releases/latest) from github.  Unpack it and `cd` into the directory.
+
+1.  Configure it:
+
+        ./configure
+
+    > You may need to install `libevent2`: if so, you should do so in
+    > the way recommended by your operating system. You can ignore
+    > warnings about lack of database support: a database is unnecessary
+    > for this purpose.
+
+1.  Build and install it:
+
+        make
+        make install
+
+1.  Create or edit the config file in `/etc/turnserver.conf`. The relevant
+    lines, with example values, are:
+
+        use-auth-secret
+        static-auth-secret=[your secret key here]
+        realm=turn.myserver.org
+
+    See `turnserver.conf` for explanations of the options. One way to generate
+    the `static-auth-secret` is with `pwgen`:
+
+        pwgen -s 64 1
+
+1.  Consider your security settings. TURN lets users request a relay which will
+    connect to arbitrary IP addresses and ports. The following configuration is
+    suggested as a minimum starting point:
+    
+        # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay.
+        no-tcp-relay
+        
+        # don't let the relay ever try to connect to private IP address ranges within your network (if any)
+        # given the turn server is likely behind your firewall, remember to include any privileged public IPs too.
+        denied-peer-ip=10.0.0.0-10.255.255.255
+        denied-peer-ip=192.168.0.0-192.168.255.255
+        denied-peer-ip=172.16.0.0-172.31.255.255
+        
+        # special case the turn server itself so that client->TURN->TURN->client flows work
+        allowed-peer-ip=10.0.0.1
+        
+        # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS.
+        user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user.
+        total-quota=1200
+
+    Ideally coturn should refuse to relay traffic which isn't SRTP; see
+    <https://github.com/matrix-org/synapse/issues/2009>
+
+1.  Ensure your firewall allows traffic into the TURN server on the ports
+    you've configured it to listen on (remember to allow both TCP and UDP TURN
+    traffic)
+
+1.  If you've configured coturn to support TLS/DTLS, generate or import your
+    private key and certificate.
+
+1.  Start the turn server:
+
+         bin/turnserver -o
+
+## synapse Setup
+
+Your home server configuration file needs the following extra keys:
+
+1.  "`turn_uris`": This needs to be a yaml list of public-facing URIs
+    for your TURN server to be given out to your clients. Add separate
+    entries for each transport your TURN server supports.
+2.  "`turn_shared_secret`": This is the secret shared between your
+    Home server and your TURN server, so you should set it to the same
+    string you used in turnserver.conf.
+3.  "`turn_user_lifetime`": This is the amount of time credentials
+    generated by your Home Server are valid for (in milliseconds).
+    Shorter times offer less potential for abuse at the expense of
+    increased traffic between web clients and your home server to
+    refresh credentials. The TURN REST API specification recommends
+    one day (86400000).
+4.  "`turn_allow_guests`": Whether to allow guest users to use the
+    TURN server. This is enabled by default, as otherwise VoIP will
+    not work reliably for guests. However, it does introduce a
+    security risk as it lets guests connect to arbitrary endpoints
+    without having gone through a CAPTCHA or similar to register a
+    real account.
+
+As an example, here is the relevant section of the config file for matrix.org:
+
+    turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ]
+    turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons
+    turn_user_lifetime: 86400000
+    turn_allow_guests: True
+
+After updating the homeserver configuration, you must restart synapse:
+
+    cd /where/you/run/synapse
+    ./synctl restart
+
+..and your Home Server now supports VoIP relaying!
diff --git a/docs/turn-howto.rst b/docs/turn-howto.rst
deleted file mode 100644
index a2fc5c882..000000000
--- a/docs/turn-howto.rst
+++ /dev/null
@@ -1,127 +0,0 @@
-How to enable VoIP relaying on your Home Server with TURN
-
-Overview
---------
-The synapse Matrix Home Server supports integration with TURN server via the
-TURN server REST API
-(http://tools.ietf.org/html/draft-uberti-behave-turn-rest-00). This allows
-the Home Server to generate credentials that are valid for use on the TURN
-server through the use of a secret shared between the Home Server and the
-TURN server.
-
-This document describes how to install coturn
-(https://github.com/coturn/coturn) which also supports the TURN REST API,
-and integrate it with synapse.
-
-coturn Setup
-============
-
-You may be able to setup coturn via your package manager,  or set it up manually using the usual ``configure, make, make install`` process.  
-
- 1. Check out coturn::
- 
-      git clone https://github.com/coturn/coturn.git coturn
-      cd coturn
-
- 2. Configure it::
- 
-      ./configure
-
-    You may need to install ``libevent2``: if so, you should do so
-    in the way recommended by your operating system.
-    You can ignore warnings about lack of database support: a
-    database is unnecessary for this purpose.
-
- 3. Build and install it::
- 
-      make
-      make install
-
- 4. Create or edit the config file in ``/etc/turnserver.conf``. The relevant
-    lines, with example values, are::
-
-      use-auth-secret
-      static-auth-secret=[your secret key here]
-      realm=turn.myserver.org
-
-    See turnserver.conf for explanations of the options.
-    One way to generate the static-auth-secret is with pwgen::
-
-       pwgen -s 64 1
-
- 5. Consider your security settings.  TURN lets users request a relay
-    which will connect to arbitrary IP addresses and ports.  At the least
-    we recommend::
-
-       # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay.
-       no-tcp-relay
-
-       # don't let the relay ever try to connect to private IP address ranges within your network (if any)
-       # given the turn server is likely behind your firewall, remember to include any privileged public IPs too.
-       denied-peer-ip=10.0.0.0-10.255.255.255
-       denied-peer-ip=192.168.0.0-192.168.255.255
-       denied-peer-ip=172.16.0.0-172.31.255.255
-
-       # special case the turn server itself so that client->TURN->TURN->client flows work
-       allowed-peer-ip=10.0.0.1
-
-       # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS.
-       user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user.
-       total-quota=1200
-
-    Ideally coturn should refuse to relay traffic which isn't SRTP;
-    see https://github.com/matrix-org/synapse/issues/2009
-
- 6. Ensure your firewall allows traffic into the TURN server on
-    the ports you've configured it to listen on (remember to allow
-    both TCP and UDP TURN traffic)
-
- 7. If you've configured coturn to support TLS/DTLS, generate or
-    import your private key and certificate.
-
- 8. Start the turn server::
- 
-       bin/turnserver -o
-
-
-synapse Setup
-=============
-
-Your home server configuration file needs the following extra keys:
-
- 1. "turn_uris": This needs to be a yaml list
-    of public-facing URIs for your TURN server to be given out 
-    to your clients. Add separate entries for each transport your
-    TURN server supports.
-
- 2. "turn_shared_secret": This is the secret shared between your Home
-    server and your TURN server, so you should set it to the same
-    string you used in turnserver.conf.
-
- 3. "turn_user_lifetime": This is the amount of time credentials
-    generated by your Home Server are valid for (in milliseconds).
-    Shorter times offer less potential for abuse at the expense
-    of increased traffic between web clients and your home server
-    to refresh credentials. The TURN REST API specification recommends
-    one day (86400000).
-
- 4. "turn_allow_guests": Whether to allow guest users to use the TURN
-    server.  This is enabled by default, as otherwise VoIP will not
-    work reliably for guests.  However, it does introduce a security risk
-    as it lets guests connect to arbitrary endpoints without having gone
-    through a CAPTCHA or similar to register a real account.
-
-As an example, here is the relevant section of the config file for
-matrix.org::
-
-    turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ]
-    turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons
-    turn_user_lifetime: 86400000
-    turn_allow_guests: True
-
-Now, restart synapse::
-
-    cd /where/you/run/synapse
-    ./synctl restart
-
-...and your Home Server now supports VoIP relaying!
diff --git a/docs/workers.rst b/docs/workers.md
similarity index 78%
rename from docs/workers.rst
rename to docs/workers.md
index e11e11741..4bd60ba0a 100644
--- a/docs/workers.rst
+++ b/docs/workers.md
@@ -1,5 +1,4 @@
-Scaling synapse via workers
-===========================
+# Scaling synapse via workers
 
 Synapse has experimental support for splitting out functionality into
 multiple separate python processes, helping greatly with scalability.  These
@@ -20,17 +19,16 @@ TCP protocol called 'replication' - analogous to MySQL or Postgres style
 database replication; feeding a stream of relevant data to the workers so they
 can be kept in sync with the main synapse process and database state.
 
-Configuration
--------------
+## Configuration
 
 To make effective use of the workers, you will need to configure an HTTP
 reverse-proxy such as nginx or haproxy, which will direct incoming requests to
 the correct worker, or to the main synapse instance. Note that this includes
-requests made to the federation port. See `<reverse_proxy.rst>`_ for
-information on setting up a reverse proxy.
+requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md)
+for information on setting up a reverse proxy.
 
 To enable workers, you need to add two replication listeners to the master
-synapse, e.g.::
+synapse, e.g.:
 
     listeners:
       # The TCP replication port
@@ -56,7 +54,7 @@ You then create a set of configs for the various worker processes.  These
 should be worker configuration files, and should be stored in a dedicated
 subdirectory, to allow synctl to manipulate them. An additional configuration
 for the master synapse process will need to be created because the process will
-not be started automatically. That configuration should look like this::
+not be started automatically. That configuration should look like this:
 
     worker_app: synapse.app.homeserver
     daemonize: true
@@ -66,17 +64,17 @@ configuration file.  You can then override configuration specific to that worker
 e.g. the HTTP listener that it provides (if any); logging configuration; etc.
 You should minimise the number of overrides though to maintain a usable config.
 
-You must specify the type of worker application (``worker_app``). The currently
+You must specify the type of worker application (`worker_app`). The currently
 available worker applications are listed below. You must also specify the
 replication endpoints that it's talking to on the main synapse process.
-``worker_replication_host`` should specify the host of the main synapse,
-``worker_replication_port`` should point to the TCP replication listener port and
-``worker_replication_http_port`` should point to the HTTP replication port.
+`worker_replication_host` should specify the host of the main synapse,
+`worker_replication_port` should point to the TCP replication listener port and
+`worker_replication_http_port` should point to the HTTP replication port.
 
-Currently, the ``event_creator`` and ``federation_reader`` workers require specifying
-``worker_replication_http_port``.
+Currently, the `event_creator` and `federation_reader` workers require specifying
+`worker_replication_http_port`.
 
-For instance::
+For instance:
 
     worker_app: synapse.app.synchrotron
 
@@ -97,15 +95,15 @@ For instance::
     worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml
 
 ...is a full configuration for a synchrotron worker instance, which will expose a
-plain HTTP ``/sync`` endpoint on port 8083 separately from the ``/sync`` endpoint provided
+plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided
 by the main synapse.
 
 Obviously you should configure your reverse-proxy to route the relevant
-endpoints to the worker (``localhost:8083`` in the above example).
+endpoints to the worker (`localhost:8083` in the above example).
 
 Finally, to actually run your worker-based synapse, you must pass synctl the -a
 commandline option to tell it to operate on all the worker configurations found
-in the given directory, e.g.::
+in the given directory, e.g.:
 
     synctl -a $CONFIG/workers start
 
@@ -114,28 +112,24 @@ synapse, unless you explicitly know it's safe not to.  For instance, restarting
 synapse without restarting all the synchrotrons may result in broken typing
 notifications.
 
-To manipulate a specific worker, you pass the -w option to synctl::
+To manipulate a specific worker, you pass the -w option to synctl:
 
     synctl -w $CONFIG/workers/synchrotron.yaml restart
 
+## Available worker applications
 
-Available worker applications
------------------------------
-
-``synapse.app.pusher``
-~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.pusher`
 
 Handles sending push notifications to sygnal and email. Doesn't handle any
-REST endpoints itself, but you should set ``start_pushers: False`` in the
+REST endpoints itself, but you should set `start_pushers: False` in the
 shared configuration file to stop the main synapse sending these notifications.
 
 Note this worker cannot be load-balanced: only one instance should be active.
 
-``synapse.app.synchrotron``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.synchrotron`
 
-The synchrotron handles ``sync`` requests from clients. In particular, it can
-handle REST endpoints matching the following regular expressions::
+The synchrotron handles `sync` requests from clients. In particular, it can
+handle REST endpoints matching the following regular expressions:
 
     ^/_matrix/client/(v2_alpha|r0)/sync$
     ^/_matrix/client/(api/v1|v2_alpha|r0)/events$
@@ -151,20 +145,18 @@ load-balance across the instances, though it will be more efficient if all
 requests from a particular user are routed to a single instance. Extracting
 a userid from the access token is currently left as an exercise for the reader.
 
-``synapse.app.appservice``
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.appservice`
 
 Handles sending output traffic to Application Services. Doesn't handle any
-REST endpoints itself, but you should set ``notify_appservices: False`` in the
+REST endpoints itself, but you should set `notify_appservices: False` in the
 shared configuration file to stop the main synapse sending these notifications.
 
 Note this worker cannot be load-balanced: only one instance should be active.
 
-``synapse.app.federation_reader``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.federation_reader`
 
 Handles a subset of federation endpoints. In particular, it can handle REST
-endpoints matching the following regular expressions::
+endpoints matching the following regular expressions:
 
     ^/_matrix/federation/v1/event/
     ^/_matrix/federation/v1/state/
@@ -190,40 +182,36 @@ reverse-proxy configuration.
 The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single
 instance.
 
-``synapse.app.federation_sender``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.federation_sender`
 
 Handles sending federation traffic to other servers. Doesn't handle any
-REST endpoints itself, but you should set ``send_federation: False`` in the
+REST endpoints itself, but you should set `send_federation: False` in the
 shared configuration file to stop the main synapse sending this traffic.
 
 Note this worker cannot be load-balanced: only one instance should be active.
 
-``synapse.app.media_repository``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.media_repository`
 
-Handles the media repository. It can handle all endpoints starting with::
+Handles the media repository. It can handle all endpoints starting with:
 
     /_matrix/media/
 
-And the following regular expressions matching media-specific administration
-APIs::
+And the following regular expressions matching media-specific administration APIs:
 
     ^/_synapse/admin/v1/purge_media_cache$
     ^/_synapse/admin/v1/room/.*/media$
     ^/_synapse/admin/v1/quarantine_media/.*$
 
-You should also set ``enable_media_repo: False`` in the shared configuration
+You should also set `enable_media_repo: False` in the shared configuration
 file to stop the main synapse running background jobs related to managing the
 media repository.
 
 Note this worker cannot be load-balanced: only one instance should be active.
 
-``synapse.app.client_reader``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.client_reader`
 
 Handles client API endpoints. It can handle REST endpoints matching the
-following regular expressions::
+following regular expressions:
 
     ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$
@@ -237,60 +225,55 @@ following regular expressions::
     ^/_matrix/client/versions$
     ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
 
-Additionally, the following REST endpoints can be handled for GET requests::
+Additionally, the following REST endpoints can be handled for GET requests:
 
     ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$
 
 Additionally, the following REST endpoints can be handled, but all requests must
-be routed to the same instance::
+be routed to the same instance:
 
     ^/_matrix/client/(r0|unstable)/register$
 
 Pagination requests can also be handled, but all requests with the same path
 room must be routed to the same instance. Additionally, care must be taken to
 ensure that the purge history admin API is not used while pagination requests
-for the room are in flight::
+for the room are in flight:
 
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$
 
-
-``synapse.app.user_dir``
-~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.user_dir`
 
 Handles searches in the user directory. It can handle REST endpoints matching
-the following regular expressions::
+the following regular expressions:
 
     ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$
 
-``synapse.app.frontend_proxy``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+### `synapse.app.frontend_proxy`
 
 Proxies some frequently-requested client endpoints to add caching and remove
 load from the main synapse. It can handle REST endpoints matching the following
-regular expressions::
+regular expressions:
 
     ^/_matrix/client/(api/v1|r0|unstable)/keys/upload
 
-If ``use_presence`` is False in the homeserver config, it can also handle REST
-endpoints matching the following regular expressions::
+If `use_presence` is False in the homeserver config, it can also handle REST
+endpoints matching the following regular expressions:
 
     ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status
 
-This "stub" presence handler will pass through ``GET`` request but make the
-``PUT`` effectively a no-op.
+This "stub" presence handler will pass through `GET` request but make the
+`PUT` effectively a no-op.
 
 It will proxy any requests it cannot handle to the main synapse instance. It
 must therefore be configured with the location of the main instance, via
-the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration
-file. For example::
+the `worker_main_http_uri` setting in the `frontend_proxy` worker configuration
+file. For example:
 
     worker_main_http_uri: http://127.0.0.1:8008
 
+### `synapse.app.event_creator`
 
-``synapse.app.event_creator``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Handles some event creation. It can handle REST endpoints matching::
+Handles some event creation. It can handle REST endpoints matching:
 
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
diff --git a/synapse/config/server.py b/synapse/config/server.py
index c8b9fe2d0..7f8d31595 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -338,7 +338,7 @@ class ServerConfig(Config):
                 (
                     "The metrics_port configuration option is deprecated in Synapse 0.31 "
                     "in favour of a listener. Please see "
-                    "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.rst"
+                    "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md"
                     " on how to configure the new listener."
                 )
             )
@@ -571,8 +571,8 @@ class ServerConfig(Config):
         #
         #   type: the type of listener. Normally 'http', but other valid options are:
         #       'manhole' (see docs/manhole.md),
-        #       'metrics' (see docs/metrics-howto.rst),
-        #       'replication' (see docs/workers.rst).
+        #       'metrics' (see docs/metrics-howto.md),
+        #       'replication' (see docs/workers.md).
         #
         #   tls: set to true to enable TLS for this listener. Will use the TLS
         #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -607,12 +607,12 @@ class ServerConfig(Config):
         #
         #   media: the media API (/_matrix/media).
         #
-        #   metrics: the metrics interface. See docs/metrics-howto.rst.
+        #   metrics: the metrics interface. See docs/metrics-howto.md.
         #
         #   openid: OpenID authentication.
         #
         #   replication: the HTTP replication API (/_synapse/replication). See
-        #       docs/workers.rst.
+        #       docs/workers.md.
         #
         #   static: static resources under synapse/static (/_matrix/static). (Mostly
         #       useful for 'fallback authentication'.)
@@ -632,7 +632,7 @@ class ServerConfig(Config):
           # that unwraps TLS.
           #
           # If you plan to use a reverse proxy, please see
-          # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst.
+          # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
           #
           %(unsecure_http_bindings)s
 

From 6670bd407201f331353a4d402369da75b61ceca9 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 17 Sep 2019 18:05:13 +0100
Subject: [PATCH 052/276] v2 3PID Invites (part of MSC2140) (#5979)

3PID invites require making a request to an identity server to check that the invited 3PID has an Matrix ID linked, and if so, what it is.

These requests are being made on behalf of a user. The user will supply an identity server and an access token for that identity server. The homeserver will then forward this request with the access token (using an `Authorization` header) and, if the given identity server doesn't support v2 endpoints, will fall back to v1 (which doesn't require any access tokens).

Requires: ~~#5976~~
---
 changelog.d/5979.feature        |   1 +
 synapse/handlers/room_member.py | 104 +++++++++++++++++++++++++-------
 2 files changed, 82 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/5979.feature

diff --git a/changelog.d/5979.feature b/changelog.d/5979.feature
new file mode 100644
index 000000000..94888aa2d
--- /dev/null
+++ b/changelog.d/5979.feature
@@ -0,0 +1 @@
+Use the v2 Identity Service API for 3PID invites.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 43d10a530..35450feb6 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -684,7 +684,14 @@ class RoomMemberHandler(object):
             )
         else:
             yield self._make_and_store_3pid_invite(
-                requester, id_server, medium, address, room_id, inviter, txn_id=txn_id
+                requester,
+                id_server,
+                medium,
+                address,
+                room_id,
+                inviter,
+                txn_id=txn_id,
+                id_access_token=id_access_token,
             )
 
     @defer.inlineCallbacks
@@ -885,7 +892,15 @@ class RoomMemberHandler(object):
 
     @defer.inlineCallbacks
     def _make_and_store_3pid_invite(
-        self, requester, id_server, medium, address, room_id, user, txn_id
+        self,
+        requester,
+        id_server,
+        medium,
+        address,
+        room_id,
+        user,
+        txn_id,
+        id_access_token=None,
     ):
         room_state = yield self.state_handler.get_current_state(room_id)
 
@@ -934,6 +949,7 @@ class RoomMemberHandler(object):
                 room_name=room_name,
                 inviter_display_name=inviter_display_name,
                 inviter_avatar_url=inviter_avatar_url,
+                id_access_token=id_access_token,
             )
         )
 
@@ -971,6 +987,7 @@ class RoomMemberHandler(object):
         room_name,
         inviter_display_name,
         inviter_avatar_url,
+        id_access_token=None,
     ):
         """
         Asks an identity server for a third party invite.
@@ -990,6 +1007,8 @@ class RoomMemberHandler(object):
             inviter_display_name (str): The current display name of the
                 inviter.
             inviter_avatar_url (str): The URL of the inviter's avatar.
+            id_access_token (str|None): The access token to authenticate to the identity
+                server with
 
         Returns:
             A deferred tuple containing:
@@ -1000,11 +1019,6 @@ class RoomMemberHandler(object):
                 display_name (str): A user-friendly name to represent the invited
                     user.
         """
-        is_url = "%s%s/_matrix/identity/api/v1/store-invite" % (
-            id_server_scheme,
-            id_server,
-        )
-
         invite_config = {
             "medium": medium,
             "address": address,
@@ -1017,22 +1031,67 @@ class RoomMemberHandler(object):
             "sender_display_name": inviter_display_name,
             "sender_avatar_url": inviter_avatar_url,
         }
-        try:
-            data = yield self.simple_http_client.post_json_get_json(
-                is_url, invite_config
+
+        # Add the identity service access token to the JSON body and use the v2
+        # Identity Service endpoints if id_access_token is present
+        data = None
+        base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server)
+
+        if id_access_token:
+            key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % (
+                id_server_scheme,
+                id_server,
             )
-        except HttpResponseException as e:
-            # Some identity servers may only support application/x-www-form-urlencoded
-            # types. This is especially true with old instances of Sydent, see
-            # https://github.com/matrix-org/sydent/pull/170
-            logger.info(
-                "Failed to POST %s with JSON, falling back to urlencoded form: %s",
-                is_url,
-                e,
-            )
-            data = yield self.simple_http_client.post_urlencoded_get_json(
-                is_url, invite_config
+
+            # Attempt a v2 lookup
+            url = base_url + "/v2/store-invite"
+            try:
+                data = yield self.simple_http_client.post_json_get_json(
+                    url,
+                    invite_config,
+                    {"Authorization": create_id_access_token_header(id_access_token)},
+                )
+            except HttpResponseException as e:
+                if e.code != 404:
+                    logger.info("Failed to POST %s with JSON: %s", url, e)
+                    raise e
+
+        if data is None:
+            key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % (
+                id_server_scheme,
+                id_server,
             )
+            url = base_url + "/api/v1/store-invite"
+
+            try:
+                data = yield self.simple_http_client.post_json_get_json(
+                    url, invite_config
+                )
+            except HttpResponseException as e:
+                logger.warning(
+                    "Error trying to call /store-invite on %s%s: %s",
+                    id_server_scheme,
+                    id_server,
+                    e,
+                )
+
+            if data is None:
+                # Some identity servers may only support application/x-www-form-urlencoded
+                # types. This is especially true with old instances of Sydent, see
+                # https://github.com/matrix-org/sydent/pull/170
+                try:
+                    data = yield self.simple_http_client.post_urlencoded_get_json(
+                        url, invite_config
+                    )
+                except HttpResponseException as e:
+                    logger.warning(
+                        "Error calling /store-invite on %s%s with fallback "
+                        "encoding: %s",
+                        id_server_scheme,
+                        id_server,
+                        e,
+                    )
+                    raise e
 
         # TODO: Check for success
         token = data["token"]
@@ -1040,8 +1099,7 @@ class RoomMemberHandler(object):
         if "public_key" in data:
             fallback_public_key = {
                 "public_key": data["public_key"],
-                "key_validity_url": "%s%s/_matrix/identity/api/v1/pubkey/isvalid"
-                % (id_server_scheme, id_server),
+                "key_validity_url": key_validity_url,
             }
         else:
             fallback_public_key = public_keys[0]

From 7100b5cc9de1620945eeba3d2725bd4239c4a3c6 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 18 Sep 2019 10:16:00 +0100
Subject: [PATCH 053/276] fix sample config

this was apparently broken by #6040.
---
 docs/sample_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index d5a8d24c2..1ee0ba8c3 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1520,7 +1520,7 @@ opentracing:
     #enabled: true
 
     # The list of homeservers we wish to send and receive span contexts and span baggage.
-    # See docs/opentracing.md
+    # See docs/opentracing.rst
     # This is a list of regexes which are matched against the server_name of the
     # homeserver.
     #

From a86a290850dad40c1ac38c4e20b2da039f246922 Mon Sep 17 00:00:00 2001
From: "J. Ryan Stinnett" <jryans@gmail.com>
Date: Wed, 18 Sep 2019 21:55:37 +0100
Subject: [PATCH 054/276] Fix logcontext spam on non-Linux platforms (#6059)

This checks whether the current platform supports thread resource usage tracking
before logging a warning to avoid log spam.

Fixes https://github.com/matrix-org/synapse/issues/6055
---
 changelog.d/6059.bugfix    |  1 +
 synapse/logging/context.py | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6059.bugfix

diff --git a/changelog.d/6059.bugfix b/changelog.d/6059.bugfix
new file mode 100644
index 000000000..49d5bd3fa
--- /dev/null
+++ b/changelog.d/6059.bugfix
@@ -0,0 +1 @@
+Fix logcontext spam on non-Linux platforms.
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 63379bfb9..370000e37 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -1,4 +1,5 @@
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,13 +43,17 @@ try:
     # exception.
     resource.getrusage(RUSAGE_THREAD)
 
+    is_thread_resource_usage_supported = True
+
     def get_thread_resource_usage():
         return resource.getrusage(RUSAGE_THREAD)
 
 
 except Exception:
     # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we
-    # won't track resource usage by returning None.
+    # won't track resource usage.
+    is_thread_resource_usage_supported = False
+
     def get_thread_resource_usage():
         return None
 
@@ -359,7 +364,11 @@ class LoggingContext(object):
 
         # When we stop, let's record the cpu used since we started
         if not self.usage_start:
-            logger.warning("Called stop on logcontext %s without calling start", self)
+            # Log a warning on platforms that support thread usage tracking
+            if is_thread_resource_usage_supported:
+                logger.warning(
+                    "Called stop on logcontext %s without calling start", self
+                )
             return
 
         utime_delta, stime_delta = self._get_cputime()

From d58cad635e27f58bc4823ba5b0a1bb69aa8b8bbc Mon Sep 17 00:00:00 2001
From: Jorik Schellekens <joriksch@gmail.com>
Date: Wed, 18 Sep 2019 22:27:59 +0100
Subject: [PATCH 055/276] Give appropriate exit codes when synctl fails (#5992)

---
 changelog.d/5992.feature |  1 +
 synctl                   | 43 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/5992.feature

diff --git a/changelog.d/5992.feature b/changelog.d/5992.feature
new file mode 100644
index 000000000..31866c292
--- /dev/null
+++ b/changelog.d/5992.feature
@@ -0,0 +1 @@
+Give appropriate exit codes when synctl fails.
diff --git a/synctl b/synctl
index a9629cf0e..45acece30 100755
--- a/synctl
+++ b/synctl
@@ -71,7 +71,20 @@ def abort(message, colour=RED, stream=sys.stderr):
     sys.exit(1)
 
 
-def start(configfile, daemonize=True):
+def start(configfile: str, daemonize: bool = True) -> bool:
+    """Attempts to start synapse.
+    Args:
+        configfile: path to a yaml synapse config file
+        daemonize: whether to daemonize synapse or keep it attached to the current
+            session
+
+    Returns:
+        True if the process started successfully
+        False if there was an error starting the process
+
+        If deamonize is False it will only return once synapse exits.
+    """
+
     write("Starting ...")
     args = SYNAPSE
 
@@ -83,25 +96,40 @@ def start(configfile, daemonize=True):
     try:
         subprocess.check_call(args)
         write("started synapse.app.homeserver(%r)" % (configfile,), colour=GREEN)
+        return True
     except subprocess.CalledProcessError as e:
         write(
             "error starting (exit code: %d); see above for logs" % e.returncode,
             colour=RED,
         )
+        return False
 
 
-def start_worker(app, configfile, worker_configfile):
+def start_worker(app: str, configfile: str, worker_configfile: str) -> bool:
+    """Attempts to start a synapse worker.
+    Args:
+        app: name of the worker's appservice
+        configfile: path to a yaml synapse config file
+        worker_configfile: path to worker specific yaml synapse file
+
+    Returns:
+        True if the process started successfully
+        False if there was an error starting the process
+    """
+
     args = [sys.executable, "-B", "-m", app, "-c", configfile, "-c", worker_configfile]
 
     try:
         subprocess.check_call(args)
         write("started %s(%r)" % (app, worker_configfile), colour=GREEN)
+        return True
     except subprocess.CalledProcessError as e:
         write(
             "error starting %s(%r) (exit code: %d); see above for logs"
             % (app, worker_configfile, e.returncode),
             colour=RED,
         )
+        return False
 
 
 def stop(pidfile, app):
@@ -292,11 +320,14 @@ def main():
             write("All processes exited; now restarting...")
 
     if action == "start" or action == "restart":
+        error = False
         if start_stop_synapse:
             # Check if synapse is already running
             if os.path.exists(pidfile) and pid_running(int(open(pidfile).read())):
                 abort("synapse.app.homeserver already running")
-            start(configfile, bool(options.daemonize))
+
+            if not start(configfile, bool(options.daemonize)):
+                error = True
 
         for worker in workers:
             env = os.environ.copy()
@@ -307,12 +338,16 @@ def main():
             for cache_name, factor in iteritems(worker.cache_factors):
                 os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor)
 
-            start_worker(worker.app, configfile, worker.configfile)
+            if not start_worker(worker.app, configfile, worker.configfile):
+                error = True
 
             # Reset env back to the original
             os.environ.clear()
             os.environ.update(env)
 
+        if error:
+            exit(1)
+
 
 if __name__ == "__main__":
     main()

From 38fd1f8e3faeffbd4bb3084012bb2c17a953625f Mon Sep 17 00:00:00 2001
From: Jorik Schellekens <joriksch@gmail.com>
Date: Wed, 18 Sep 2019 22:30:44 +0100
Subject: [PATCH 056/276] Fix typo in account_threepid_delegates config (#6028)

---
 changelog.d/6028.feature       | 1 +
 docs/sample_config.yaml        | 2 +-
 synapse/config/registration.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6028.feature

diff --git a/changelog.d/6028.feature b/changelog.d/6028.feature
new file mode 100644
index 000000000..cf603fa0c
--- /dev/null
+++ b/changelog.d/6028.feature
@@ -0,0 +1 @@
+Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 1ee0ba8c3..3e4edc6b0 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -938,7 +938,7 @@ uploads_path: "DATADIR/uploads"
 # https://matrix.org/docs/spec/identity_service/latest
 #
 account_threepid_delegates:
-    #email: https://example.com     # Delegate email sending to matrix.org
+    #email: https://example.com     # Delegate email sending to example.org
     #msisdn: http://localhost:8090  # Delegate SMS sending to this local process
 
 # Users who register on this homeserver will automatically be joined
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index 9548560ed..d4654e99b 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -294,7 +294,7 @@ class RegistrationConfig(Config):
         # https://matrix.org/docs/spec/identity_service/latest
         #
         account_threepid_delegates:
-            #email: https://example.com     # Delegate email sending to matrix.org
+            #email: https://example.com     # Delegate email sending to example.org
             #msisdn: http://localhost:8090  # Delegate SMS sending to this local process
 
         # Users who register on this homeserver will automatically be joined

From a136137b2efae0fa5b3344cb94759fe0f5913221 Mon Sep 17 00:00:00 2001
From: Pete <PMaynard@users.noreply.github.com>
Date: Thu, 19 Sep 2019 09:52:59 +0100
Subject: [PATCH 057/276] Update INSTALL.md with void-linux (#5873)

---
 INSTALL.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/INSTALL.md b/INSTALL.md
index 572888246..38c113b26 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -349,6 +349,13 @@ sudo pip uninstall py-bcrypt
 sudo pip install py-bcrypt
 ```
 
+### Void Linux
+
+Synapse can be found in the void repositories as 'synapse':
+
+    xbps-install -Su
+    xbps-install -S synapse
+
 ### FreeBSD
 
 Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from:

From 62e3ff92fd3228b5c34f6cee691e22f9b1f85c9e Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 19 Sep 2019 10:53:14 +0100
Subject: [PATCH 058/276] Remove POST method from password reset submit_token
 endpoint (#6056)

Removes the POST method from `/password_reset/<medium>/submit_token/` as it's only used by phone number verification which Synapse does not support yet.
---
 changelog.d/6056.bugfix                 |  1 +
 synapse/rest/client/v2_alpha/account.py | 17 -----------------
 2 files changed, 1 insertion(+), 17 deletions(-)
 create mode 100644 changelog.d/6056.bugfix

diff --git a/changelog.d/6056.bugfix b/changelog.d/6056.bugfix
new file mode 100644
index 000000000..4d9573a58
--- /dev/null
+++ b/changelog.d/6056.bugfix
@@ -0,0 +1 @@
+Remove POST method from password reset submit_token endpoint until we implement submit_url functionality.
\ No newline at end of file
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 2ea515d2f..afaaeeacd 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -272,23 +272,6 @@ class PasswordResetSubmitTokenServlet(RestServlet):
         request.write(html.encode("utf-8"))
         finish_request(request)
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, medium):
-        if medium != "email":
-            raise SynapseError(
-                400, "This medium is currently not supported for password resets"
-            )
-
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["sid", "client_secret", "token"])
-
-        valid, _ = yield self.store.validate_threepid_session(
-            body["sid"], body["client_secret"], body["token"], self.clock.time_msec()
-        )
-        response_code = 200 if valid else 400
-
-        return response_code, {"success": valid}
-
 
 class PasswordRestServlet(RestServlet):
     PATTERNS = client_patterns("/account/password$")

From 84a2743e2eaf5402cef8b68327efaf54daf64150 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 19 Sep 2019 10:55:43 +0100
Subject: [PATCH 059/276] Add changelog

---
 changelog.d/6064.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6064.misc

diff --git a/changelog.d/6064.misc b/changelog.d/6064.misc
new file mode 100644
index 000000000..28dc89111
--- /dev/null
+++ b/changelog.d/6064.misc
@@ -0,0 +1 @@
+Clean up the sample config for SAML authentication.

From bcd91328692555d85df346c4571085c9b41b8f6a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 19 Sep 2019 15:06:27 +0100
Subject: [PATCH 060/276] Undo the deletion of some tables (#6047)

This is a partial revert of #5893. The problem is that if we drop these tables
in the same release as removing the code that writes to them, it prevents users
users from being able to roll back to a previous release.

So let's leave the tables in place for now, and remember to drop them in a
subsequent release.

(Note that these tables haven't been *read* for *years*, so any missing rows
resulting from a temporary upgrade to vNext won't cause a problem.)
---
 changelog.d/5893.misc                         |  2 +-
 changelog.d/6047.misc                         |  2 ++
 .../delta/56/drop_unused_event_tables.sql     | 20 -------------------
 3 files changed, 3 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/6047.misc
 delete mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql

diff --git a/changelog.d/5893.misc b/changelog.d/5893.misc
index 07ee4888d..5ef171cb3 100644
--- a/changelog.d/5893.misc
+++ b/changelog.d/5893.misc
@@ -1 +1 @@
-Drop some unused tables.
+Stop populating some unused tables.
diff --git a/changelog.d/6047.misc b/changelog.d/6047.misc
new file mode 100644
index 000000000..a4cdb8abb
--- /dev/null
+++ b/changelog.d/6047.misc
@@ -0,0 +1,2 @@
+Stop populating some unused tables.
+
diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql
deleted file mode 100644
index 9f09922c6..000000000
--- a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright 2019 The Matrix.org Foundation C.I.C.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- these tables are never used.
-DROP TABLE IF EXISTS room_names;
-DROP TABLE IF EXISTS topics;
-DROP TABLE IF EXISTS history_visibility;
-DROP TABLE IF EXISTS guest_access;

From 35ce3bda7aaa6281f02123225ca63d913fa12df1 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 19 Sep 2019 15:06:48 +0100
Subject: [PATCH 061/276] Add some notes on rolling back to v1.3.1. (#6049)

---
 UPGRADE.rst          | 25 +++++++++++++++++++++++++
 changelog.d/6049.doc |  1 +
 2 files changed, 26 insertions(+)
 create mode 100644 changelog.d/6049.doc

diff --git a/UPGRADE.rst b/UPGRADE.rst
index 5aaf80490..53f3af4ed 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -99,6 +99,31 @@ Synapse will expect these files to exist inside the configured template director
 default templates, see `synapse/res/templates
 <https://github.com/matrix-org/synapse/tree/master/synapse/res/templates>`_.
 
+Rolling back to v1.3.1
+----------------------
+
+If you encounter problems with v1.4.0, it should be possible to roll back to
+v1.3.1, subject to the following:
+
+* The 'room statistics' engine was heavily reworked in this release (see
+  `#5971 <https://github.com/matrix-org/synapse/pull/5971>`_), including
+  significant changes to the database schema, which are not easily
+  reverted. This will cause the room statistics engine to stop updating when
+  you downgrade.
+
+  The room statistics are essentially unused in v1.3.1 (in future versions of
+  Synapse, they will be used to populate the room directory), so there should
+  be no loss of functionality. However, the statistics engine will write errors
+  to the logs, which can be avoided by setting the following in `homeserver.yaml`:
+
+  .. code:: yaml
+
+    stats:
+      enabled: false
+
+  Don't forget to re-enable it when you upgrade again, in preparation for its
+  use in the room directory!
+
 Upgrading to v1.2.0
 ===================
 
diff --git a/changelog.d/6049.doc b/changelog.d/6049.doc
new file mode 100644
index 000000000..e0307bf5c
--- /dev/null
+++ b/changelog.d/6049.doc
@@ -0,0 +1 @@
+Add some notes on rolling back to v1.3.1.

From 466866a1d9dd1fcf82348a36c0532cb0c6614767 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 19 Sep 2019 15:08:14 +0100
Subject: [PATCH 062/276] Update the issue template for new way of getting
 server version (#6051)

cf #4878
---
 .github/ISSUE_TEMPLATE/BUG_REPORT.md | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md
index 5cf844bfb..9dd05bcba 100644
--- a/.github/ISSUE_TEMPLATE/BUG_REPORT.md
+++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md
@@ -7,7 +7,7 @@ about: Create a report to help us improve
 <!--
 
 **IF YOU HAVE SUPPORT QUESTIONS ABOUT RUNNING OR CONFIGURING YOUR OWN HOME SERVER**:
-You will likely get better support more quickly if you ask in ** #matrix:matrix.org ** ;)
+You will likely get better support more quickly if you ask in ** #synapse:matrix.org ** ;)
 
 
 This is a bug report template. By following the instructions below and
@@ -44,22 +44,26 @@ those (please be careful to remove any personal or private data). Please surroun
 <!-- IMPORTANT: please answer the following questions, to help us narrow down the problem -->
 
 <!-- Was this issue identified on matrix.org or another homeserver? -->
-- **Homeserver**: 
+- **Homeserver**:
 
 If not matrix.org:
 
 <!--
-What version of Synapse is running?
-You can find the Synapse version by inspecting the server headers (replace matrix.org with
-your own homeserver domain):
-$ curl -v https://matrix.org/_matrix/client/versions 2>&1 | grep "Server:"
--->
-- **Version**: 
+ What version of Synapse is running?
 
-- **Install method**: 
+You can find the Synapse version with this command:
+
+$ curl http://localhost:8008/_synapse/admin/v1/server_version
+
+(You may need to replace `localhost:8008` if Synapse is not configured to
+listen on that port.)
+-->
+- **Version**:
+
+- **Install method**:
 <!-- examples: package manager/git clone/pip  -->
 
-- **Platform**: 
+- **Platform**:
 <!--
 Tell us about the environment in which your homeserver is operating
 distro, hardware, if it's running in a vm/container, etc.

From 7423fade92d98d4246947bc8491af5827cd9e0dd Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 19 Sep 2019 17:16:50 +0100
Subject: [PATCH 063/276] better logging

---
 synapse/handlers/saml_handler.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index 5fa8272dc..f000d2a00 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -111,6 +111,8 @@ class SamlHandler:
             logger.warning("SAML2 response was not signed")
             raise SynapseError(400, "SAML2 response was not signed")
 
+        logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava)
+
         try:
             remote_user_id = saml2_auth.ava["uid"][0]
         except KeyError:

From fe349b497e4b22bb409eb199b77479c5895af525 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 19 Sep 2019 18:20:01 +0100
Subject: [PATCH 064/276] Update the upgrade notes (#6050)

 * make it clear that if you installed from a package manager, you should use
   that to upgrade

 * Document the new way of getting the server version (cf #4878)

 * Write some words about downgrading.
---
 UPGRADE.rst          | 82 +++++++++++++++++++++++++++++---------------
 changelog.d/6050.doc |  1 +
 2 files changed, 55 insertions(+), 28 deletions(-)
 create mode 100644 changelog.d/6050.doc

diff --git a/UPGRADE.rst b/UPGRADE.rst
index 53f3af4ed..4ede973a0 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -2,52 +2,78 @@ Upgrading Synapse
 =================
 
 Before upgrading check if any special steps are required to upgrade from the
-what you currently have installed to current version of synapse. The extra
+what you currently have installed to current version of Synapse. The extra
 instructions that may be required are listed later in this document.
 
-1. If synapse was installed in a virtualenv then activate that virtualenv before
-   upgrading. If synapse is installed in a virtualenv in ``~/synapse/env`` then
-   run:
+* If Synapse was installed using `prebuilt packages
+  <INSTALL.md#prebuilt-packages>`_, you will need to follow the normal process
+  for upgrading those packages.
 
-   .. code:: bash
+* If Synapse was installed from source, then:
+
+  1. Activate the virtualenv before upgrading. For example, if Synapse is
+     installed in a virtualenv in ``~/synapse/env`` then run:
+
+     .. code:: bash
 
        source ~/synapse/env/bin/activate
 
-2. If synapse was installed using pip then upgrade to the latest version by
-   running:
+  2. If Synapse was installed using pip then upgrade to the latest version by
+     running:
 
-   .. code:: bash
+     .. code:: bash
 
-       pip install --upgrade matrix-synapse[all]
+       pip install --upgrade matrix-synapse
 
-       # restart synapse
-       synctl restart
+     If Synapse was installed using git then upgrade to the latest version by
+     running:
 
-
-   If synapse was installed using git then upgrade to the latest version by
-   running:
-
-   .. code:: bash
-
-       # Pull the latest version of the master branch.
+     .. code:: bash
+     
        git pull
+       pip install --upgrade .
 
-       # Update synapse and its python dependencies.
-       pip install --upgrade .[all]
+  3. Restart Synapse:
+
+     .. code:: bash
 
-       # restart synapse
        ./synctl restart
 
-
-To check whether your update was successful, you can check the Server header
-returned by the Client-Server API:
+To check whether your update was successful, you can check the running server
+version with:
 
 .. code:: bash
 
-    # replace <host.name> with the hostname of your synapse homeserver.
-    # You may need to specify a port (eg, :8448) if your server is not
-    # configured on port 443.
-    curl -kv https://<host.name>/_matrix/client/versions 2>&1 | grep "Server:"
+    # you may need to replace 'localhost:8008' if synapse is not configured
+    # to listen on port 8008.
+
+    curl http://localhost:8008/_synapse/admin/v1/server_version
+
+Rolling back to older versions
+------------------------------
+
+Rolling back to previous releases can be difficult, due to database schema
+changes between releases. Where we have been able to test the rollback process,
+this will be noted below.
+
+In general, you will need to undo any changes made during the upgrade process,
+for example:
+
+* pip:
+
+  .. code:: bash
+
+     source env/bin/activate
+     # replace `1.3.0` accordingly:
+     pip install matrix-synapse==1.3.0
+
+* Debian:
+
+  .. code:: bash
+
+     # replace `1.3.0` and `stretch` accordingly:
+     wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb
+     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
 
 Upgrading to v1.4.0
 ===================
diff --git a/changelog.d/6050.doc b/changelog.d/6050.doc
new file mode 100644
index 000000000..3d19c69bc
--- /dev/null
+++ b/changelog.d/6050.doc
@@ -0,0 +1 @@
+Update the upgrade notes.

From 599f786e4ee98050f399ff7f530a7208ce14468d Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 19 Sep 2019 18:52:17 +0100
Subject: [PATCH 065/276] Update 6037.feature

---
 changelog.d/6037.feature | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature
index 95d82bd4d..85553d2da 100644
--- a/changelog.d/6037.feature
+++ b/changelog.d/6037.feature
@@ -1 +1 @@
-Handle userid clashes when authenticating via SAML by appending an integer suffix.
\ No newline at end of file
+Make the process for mapping SAML2 users to matrix IDs more flexible.

From b74606ea2262a717193f08bb6876459c1ee2d97d Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 19 Sep 2019 20:29:11 +0100
Subject: [PATCH 066/276] Fix a bug with saml attribute maps.

Fixes a bug where the default attribute maps were prioritised over
user-specified ones, resulting in incorrect mappings.

The problem is that if you call SPConfig.load() multiple times, it adds new
attribute mappers to a list. So by calling it with the default config first,
and then the user-specified config, we would always get the default mappers
before the user-specified mappers.

To solve this, let's merge the config dicts first, and then pass them to
SPConfig.
---
 changelog.d/6069.bugfix        |  1 +
 synapse/config/saml2_config.py | 34 ++++++++++++++++++++++++++++------
 synapse/util/module_loader.py  | 20 +++++++++++++++++++-
 3 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/6069.bugfix

diff --git a/changelog.d/6069.bugfix b/changelog.d/6069.bugfix
new file mode 100644
index 000000000..a437ac41a
--- /dev/null
+++ b/changelog.d/6069.bugfix
@@ -0,0 +1 @@
+Fix a bug which caused SAML attribute maps to be overridden by defaults.
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 6a8161547..14539fdb2 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2018 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,11 +13,29 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from synapse.python_dependencies import DependencyException, check_requirements
+from synapse.util.module_loader import load_python_module
 
 from ._base import Config, ConfigError
 
 
+def _dict_merge(merge_dict, into_dct):
+    for k, v in merge_dict.items():
+        if k not in into_dct:
+            into_dct[k] = v
+            continue
+
+        current_val = into_dct[k]
+
+        if isinstance(v, dict) and isinstance(current_val, dict):
+            _dict_merge(v, current_val)
+            continue
+
+        # otherwise we just overwrite
+        into_dct[k] = v
+
+
 class SAML2Config(Config):
     def read_config(self, config, **kwargs):
         self.saml2_enabled = False
@@ -33,15 +52,18 @@ class SAML2Config(Config):
 
         self.saml2_enabled = True
 
-        import saml2.config
-
-        self.saml2_sp_config = saml2.config.SPConfig()
-        self.saml2_sp_config.load(self._default_saml_config_dict())
-        self.saml2_sp_config.load(saml2_config.get("sp_config", {}))
+        saml2_config_dict = self._default_saml_config_dict()
+        _dict_merge(saml2_config.get("sp_config", {}), saml2_config_dict)
 
         config_path = saml2_config.get("config_path", None)
         if config_path is not None:
-            self.saml2_sp_config.load_file(config_path)
+            mod = load_python_module(config_path)
+            _dict_merge(mod.CONFIG, saml2_config_dict)
+
+        import saml2.config
+
+        self.saml2_sp_config = saml2.config.SPConfig()
+        self.saml2_sp_config.load(saml2_config_dict)
 
         # session lifetime: in milliseconds
         self.saml2_session_lifetime = self.parse_duration(
diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py
index 522acd5aa..7ff7eb1e4 100644
--- a/synapse/util/module_loader.py
+++ b/synapse/util/module_loader.py
@@ -14,12 +14,13 @@
 # limitations under the License.
 
 import importlib
+import importlib.util
 
 from synapse.config._base import ConfigError
 
 
 def load_module(provider):
-    """ Loads a module with its config
+    """ Loads a synapse module with its config
     Take a dict with keys 'module' (the module name) and 'config'
     (the config dict).
 
@@ -38,3 +39,20 @@ def load_module(provider):
         raise ConfigError("Failed to parse config for %r: %r" % (provider["module"], e))
 
     return provider_class, provider_config
+
+
+def load_python_module(location: str):
+    """Load a python module, and return a reference to its global namespace
+
+    Args:
+        location (str): path to the module
+
+    Returns:
+        python module object
+    """
+    spec = importlib.util.spec_from_file_location(location, location)
+    if spec is None:
+        raise Exception("Unable to load module at %s" % (location,))
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod

From 36015d68efccd2520ac0a569a5f8714544f6568c Mon Sep 17 00:00:00 2001
From: "J. Ryan Stinnett" <jryans@gmail.com>
Date: Thu, 19 Sep 2019 22:28:30 +0100
Subject: [PATCH 067/276] Use unstable prefix for 3PID unbind API (#6062)

---
 changelog.d/5980.feature                | 2 +-
 changelog.d/6062.bugfix                 | 1 +
 synapse/rest/client/v2_alpha/account.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6062.bugfix

diff --git a/changelog.d/5980.feature b/changelog.d/5980.feature
index f25d8d81d..e20117cf1 100644
--- a/changelog.d/5980.feature
+++ b/changelog.d/5980.feature
@@ -1 +1 @@
-Add POST /_matrix/client/r0/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account.
\ No newline at end of file
+Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account.
diff --git a/changelog.d/6062.bugfix b/changelog.d/6062.bugfix
new file mode 100644
index 000000000..e20117cf1
--- /dev/null
+++ b/changelog.d/6062.bugfix
@@ -0,0 +1 @@
+Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account.
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index afaaeeacd..ce1487dbc 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -529,7 +529,7 @@ class ThreepidRestServlet(RestServlet):
 
 
 class ThreepidUnbindRestServlet(RestServlet):
-    PATTERNS = client_patterns("/account/3pid/unbind$")
+    PATTERNS = client_patterns("/account/3pid/unbind$", releases=(), unstable=True)
 
     def __init__(self, hs):
         super(ThreepidUnbindRestServlet, self).__init__()

From 2def5ea0da4b8134384adcd48e1e312f2f7e65c9 Mon Sep 17 00:00:00 2001
From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com>
Date: Thu, 19 Sep 2019 22:29:47 +0100
Subject: [PATCH 068/276] Docker: support SYNAPSE_WORKER envvar (#6058)

* Allow passing SYNAPSE_WORKER envvar

* changelog.d

* Document SYNAPSE_WORKER.

Attempting to imply that you don't need to change this default
unless you're in worker mode.

Also aware that there's a bigger problem of attempting to document
a complete working configuration of workers using docker, as we
currently only document to use `synctl` for worker mode, and synctl
doesn't work that way in docker.
---
 changelog.d/6058.docker | 1 +
 docker/README.md        | 2 ++
 docker/start.py         | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6058.docker

diff --git a/changelog.d/6058.docker b/changelog.d/6058.docker
new file mode 100644
index 000000000..30be6933c
--- /dev/null
+++ b/changelog.d/6058.docker
@@ -0,0 +1 @@
+Provide SYNAPSE_WORKER envvar to specify python module.
diff --git a/docker/README.md b/docker/README.md
index d5879c2f2..4b712f3f5 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -89,6 +89,8 @@ The following environment variables are supported in run mode:
   `/data`.
 * `SYNAPSE_CONFIG_PATH`: path to the config file. Defaults to
   `<SYNAPSE_CONFIG_DIR>/homeserver.yaml`.
+* `SYNAPSE_WORKER`: module to execute, used when running synapse with workers.
+   Defaults to `synapse.app.homeserver`, which is suitable for non-worker mode.
 * `UID`, `GID`: the user and group id to run Synapse as. Defaults to `991`, `991`.
 * `TZ`: the [timezone](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) the container will run with. Defaults to `UTC`.
 
diff --git a/docker/start.py b/docker/start.py
index 260f2d994..e41ea20e7 100755
--- a/docker/start.py
+++ b/docker/start.py
@@ -182,6 +182,7 @@ def main(args, environ):
     mode = args[1] if len(args) > 1 else None
     desired_uid = int(environ.get("UID", "991"))
     desired_gid = int(environ.get("GID", "991"))
+    synapse_worker = environ.get("SYNAPSE_WORKER", "synapse.app.homeserver")
     if (desired_uid == os.getuid()) and (desired_gid == os.getgid()):
         ownership = None
     else:
@@ -245,7 +246,7 @@ def main(args, environ):
 
     log("Starting synapse with config file " + config_path)
 
-    args = ["python", "-m", "synapse.app.homeserver", "--config-path", config_path]
+    args = ["python", "-m", synapse_worker, "--config-path", config_path]
     if ownership is not None:
         args = ["su-exec", ownership] + args
         os.execv("/sbin/su-exec", args)

From 3ac614eb6c294b7f77dde123f85ddaf3a389e3b8 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 20 Sep 2019 10:46:34 +0100
Subject: [PATCH 069/276] Drop support for bind param on POST /account/3pid
 (MSC2290) (#6067)

As per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/files#diff-05cde9463e9209b701312b3baf2fb2ebR151), we're dropping the bind parameter from `/account/3pid`. This endpoint can now only be used for adding threepid's to the user's account on the homeserver.
---
 changelog.d/6067.feature                | 1 +
 synapse/rest/client/v2_alpha/account.py | 4 ----
 sytest-blacklist                        | 9 +++++++++
 3 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6067.feature

diff --git a/changelog.d/6067.feature b/changelog.d/6067.feature
new file mode 100644
index 000000000..72685961c
--- /dev/null
+++ b/changelog.d/6067.feature
@@ -0,0 +1 @@
+Remove `bind` parameter from Client Server POST `/account` endpoint as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/).
\ No newline at end of file
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index ce1487dbc..1791f4d79 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -521,10 +521,6 @@ class ThreepidRestServlet(RestServlet):
             user_id, threepid["medium"], threepid["address"], threepid["validated_at"]
         )
 
-        if "bind" in body and body["bind"]:
-            logger.debug("Binding threepid %s to %s", threepid, user_id)
-            yield self.identity_handler.bind_threepid(threepid_creds, user_id)
-
         return 200, {}
 
 
diff --git a/sytest-blacklist b/sytest-blacklist
index 11785fd43..04698cb06 100644
--- a/sytest-blacklist
+++ b/sytest-blacklist
@@ -29,3 +29,12 @@ Enabling an unknown default rule fails with 404
 
 # Blacklisted due to https://github.com/matrix-org/synapse/issues/1663
 New federated private chats get full presence information (SYN-115)
+
+# Blacklisted temporarily due to https://github.com/matrix-org/matrix-doc/pull/2290
+# These sytests need to be updated with new endpoints, which will come in a later PR
+# That PR will also remove this blacklist
+Can bind 3PID via home server
+Can bind and unbind 3PID via homeserver
+3PIDs are unbound after account deactivation
+Can bind and unbind 3PID via /unbind by specifying the identity server
+Can bind and unbind 3PID via /unbind without specifying the identity server

From aeb40f355c8590855eeca05b49bfff2b91faa85b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 20 Sep 2019 10:46:59 +0100
Subject: [PATCH 070/276] Ensure email validation link parameters are
 URL-encoded (#6063)

The validation links sent via email had their query parameters inserted without any URL-encoding. Surprisingly this didn't seem to cause any issues, but if a user were to put a `/` in their client_secret it could lead to problems.
---
 changelog.d/6063.bugfix |  1 +
 synapse/push/mailer.py  | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6063.bugfix

diff --git a/changelog.d/6063.bugfix b/changelog.d/6063.bugfix
new file mode 100644
index 000000000..7485e32a2
--- /dev/null
+++ b/changelog.d/6063.bugfix
@@ -0,0 +1 @@
+Ensure query parameters in email validation links are URL-encoded.
\ No newline at end of file
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 3dfd52784..2437235dc 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -136,10 +136,11 @@ class Mailer(object):
                 group together multiple email sending attempts
             sid (str): The generated session ID
         """
+        params = {"token": token, "client_secret": client_secret, "sid": sid}
         link = (
             self.hs.config.public_baseurl
-            + "_matrix/client/unstable/password_reset/email/submit_token"
-            "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid)
+            + "_matrix/client/unstable/password_reset/email/submit_token?%s"
+            % urllib.parse.urlencode(params)
         )
 
         template_vars = {"link": link}
@@ -163,10 +164,11 @@ class Mailer(object):
                 group together multiple email sending attempts
             sid (str): The generated session ID
         """
+        params = {"token": token, "client_secret": client_secret, "sid": sid}
         link = (
             self.hs.config.public_baseurl
-            + "_matrix/client/unstable/registration/email/submit_token"
-            "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid)
+            + "_matrix/client/unstable/registration/email/submit_token?%s"
+            % urllib.parse.urlencode(params)
         )
 
         template_vars = {"link": link}

From 33757bad19a19adaef211a0d58fc369c68dfeb3c Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 20 Sep 2019 11:15:14 +0100
Subject: [PATCH 071/276] More better logging

---
 synapse/handlers/saml_handler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py
index f000d2a00..cc9e6b9bd 100644
--- a/synapse/handlers/saml_handler.py
+++ b/synapse/handlers/saml_handler.py
@@ -111,7 +111,8 @@ class SamlHandler:
             logger.warning("SAML2 response was not signed")
             raise SynapseError(400, "SAML2 response was not signed")
 
-        logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava)
+        logger.info("SAML2 response: %s", saml2_auth.origxml)
+        logger.info("SAML2 mapped attributes: %s", saml2_auth.ava)
 
         try:
             remote_user_id = saml2_auth.ava["uid"][0]

From 9d94313209fdb2141189c927cb1f81fea6feb5e4 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 20 Sep 2019 12:05:00 +0100
Subject: [PATCH 072/276] Fix exception when resetting retry timings

Fixes:
> TypeError: set_destination_retry_timings() missing 1 required positional
argument: 'retry_interval'

Introduced in #6016.
---
 changelog.d/6072.misc                  | 1 +
 synapse/federation/transport/server.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6072.misc

diff --git a/changelog.d/6072.misc b/changelog.d/6072.misc
new file mode 100644
index 000000000..91cf16471
--- /dev/null
+++ b/changelog.d/6072.misc
@@ -0,0 +1 @@
+Add a 'failure_ts' column to the 'destinations' database table.
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 7dc696c7a..7f8a16e35 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -165,7 +165,7 @@ class Authenticator(object):
     async def _reset_retry_timings(self, origin):
         try:
             logger.info("Marking origin %r as up", origin)
-            await self.store.set_destination_retry_timings(origin, 0, 0)
+            await self.store.set_destination_retry_timings(origin, None, 0, 0)
         except Exception:
             logger.exception("Error resetting retry timings on %s", origin)
 

From 7763dd3e9592909cfe3d7763f4a68b8135fc2bdc Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 20 Sep 2019 14:58:37 +0100
Subject: [PATCH 073/276] Remove trailing slash ability from password reset's
 submit_token endpoint (#6074)

Remove trailing slash ability from the password reset submit_token endpoint. Since we provide the link in an email, and have never sent it with a trailing slash, there's no point for us to accept them on the endpoint.
---
 changelog.d/6074.feature                | 1 +
 synapse/rest/client/v2_alpha/account.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6074.feature

diff --git a/changelog.d/6074.feature b/changelog.d/6074.feature
new file mode 100644
index 000000000..b7aa9c99d
--- /dev/null
+++ b/changelog.d/6074.feature
@@ -0,0 +1 @@
+Prevent password reset's submit_token endpoint from accepting trailing slashes.
\ No newline at end of file
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 1791f4d79..3c5b23dc8 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -200,7 +200,7 @@ class PasswordResetSubmitTokenServlet(RestServlet):
     """Handles 3PID validation token submission"""
 
     PATTERNS = client_patterns(
-        "/password_reset/(?P<medium>[^/]*)/submit_token/*$", releases=(), unstable=True
+        "/password_reset/(?P<medium>[^/]*)/submit_token$", releases=(), unstable=True
     )
 
     def __init__(self, hs):

From df3401a71d78088da36a03c73d35bc116c712df6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 20 Sep 2019 15:21:30 +0100
Subject: [PATCH 074/276] Allow HS to send emails when adding an email to the
 HS (#6042)

---
 changelog.d/6042.feature                      |   1 +
 docs/sample_config.yaml                       |  12 +
 synapse/config/emailconfig.py                 |  36 +++
 synapse/handlers/identity.py                  |  17 +-
 synapse/push/mailer.py                        |  29 ++
 synapse/res/templates/add_threepid.html       |   9 +
 synapse/res/templates/add_threepid.txt        |   6 +
 .../res/templates/add_threepid_failure.html   |   8 +
 .../res/templates/add_threepid_success.html   |   6 +
 synapse/rest/client/v2_alpha/account.py       | 252 +++++++++++++++---
 synapse/rest/client/v2_alpha/register.py      |  24 +-
 synapse/storage/registration.py               |  31 ++-
 12 files changed, 359 insertions(+), 72 deletions(-)
 create mode 100644 changelog.d/6042.feature
 create mode 100644 synapse/res/templates/add_threepid.html
 create mode 100644 synapse/res/templates/add_threepid.txt
 create mode 100644 synapse/res/templates/add_threepid_failure.html
 create mode 100644 synapse/res/templates/add_threepid_success.html

diff --git a/changelog.d/6042.feature b/changelog.d/6042.feature
new file mode 100644
index 000000000..a73776036
--- /dev/null
+++ b/changelog.d/6042.feature
@@ -0,0 +1 @@
+Allow homeserver to handle or delegate email validation when adding an email to a user's account.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 3e4edc6b0..61d9f09a9 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1261,6 +1261,12 @@ password_config:
 #   #registration_template_html: registration.html
 #   #registration_template_text: registration.txt
 #
+#   # Templates for validation emails sent by the homeserver when adding an email to
+#   # your user account
+#   #
+#   #add_threepid_template_html: add_threepid.html
+#   #add_threepid_template_text: add_threepid.txt
+#
 #   # Templates for password reset success and failure pages that a user
 #   # will see after attempting to reset their password
 #   #
@@ -1272,6 +1278,12 @@ password_config:
 #   #
 #   #registration_template_success_html: registration_success.html
 #   #registration_template_failure_html: registration_failure.html
+#
+#   # Templates for success and failure pages that a user will see after attempting
+#   # to add an email or phone to their account
+#   #
+#   #add_threepid_success_html: add_threepid_success.html
+#   #add_threepid_failure_html: add_threepid_failure.html
 
 
 #password_providers:
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index e5de768b0..d9b43de66 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -169,12 +169,22 @@ class EmailConfig(Config):
             self.email_registration_template_text = email_config.get(
                 "registration_template_text", "registration.txt"
             )
+            self.email_add_threepid_template_html = email_config.get(
+                "add_threepid_template_html", "add_threepid.html"
+            )
+            self.email_add_threepid_template_text = email_config.get(
+                "add_threepid_template_text", "add_threepid.txt"
+            )
+
             self.email_password_reset_template_failure_html = email_config.get(
                 "password_reset_template_failure_html", "password_reset_failure.html"
             )
             self.email_registration_template_failure_html = email_config.get(
                 "registration_template_failure_html", "registration_failure.html"
             )
+            self.email_add_threepid_template_failure_html = email_config.get(
+                "add_threepid_template_failure_html", "add_threepid_failure.html"
+            )
 
             # These templates do not support any placeholder variables, so we
             # will read them from disk once during setup
@@ -184,6 +194,9 @@ class EmailConfig(Config):
             email_registration_template_success_html = email_config.get(
                 "registration_template_success_html", "registration_success.html"
             )
+            email_add_threepid_template_success_html = email_config.get(
+                "add_threepid_template_success_html", "add_threepid_success.html"
+            )
 
             # Check templates exist
             for f in [
@@ -191,9 +204,14 @@ class EmailConfig(Config):
                 self.email_password_reset_template_text,
                 self.email_registration_template_html,
                 self.email_registration_template_text,
+                self.email_add_threepid_template_html,
+                self.email_add_threepid_template_text,
                 self.email_password_reset_template_failure_html,
+                self.email_registration_template_failure_html,
+                self.email_add_threepid_template_failure_html,
                 email_password_reset_template_success_html,
                 email_registration_template_success_html,
+                email_add_threepid_template_success_html,
             ]:
                 p = os.path.join(self.email_template_dir, f)
                 if not os.path.isfile(p):
@@ -212,6 +230,12 @@ class EmailConfig(Config):
             self.email_registration_template_success_html_content = self.read_file(
                 filepath, "email.registration_template_success_html"
             )
+            filepath = os.path.join(
+                self.email_template_dir, email_add_threepid_template_success_html
+            )
+            self.email_add_threepid_template_success_html_content = self.read_file(
+                filepath, "email.add_threepid_template_success_html"
+            )
 
         if self.email_enable_notifs:
             required = [
@@ -328,6 +352,12 @@ class EmailConfig(Config):
         #   #registration_template_html: registration.html
         #   #registration_template_text: registration.txt
         #
+        #   # Templates for validation emails sent by the homeserver when adding an email to
+        #   # your user account
+        #   #
+        #   #add_threepid_template_html: add_threepid.html
+        #   #add_threepid_template_text: add_threepid.txt
+        #
         #   # Templates for password reset success and failure pages that a user
         #   # will see after attempting to reset their password
         #   #
@@ -339,6 +369,12 @@ class EmailConfig(Config):
         #   #
         #   #registration_template_success_html: registration_success.html
         #   #registration_template_failure_html: registration_failure.html
+        #
+        #   # Templates for success and failure pages that a user will see after attempting
+        #   # to add an email or phone to their account
+        #   #
+        #   #add_threepid_success_html: add_threepid_success.html
+        #   #add_threepid_failure_html: add_threepid_failure.html
         """
 
 
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 512f38e5a..156719e30 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -81,11 +81,10 @@ class IdentityHandler(BaseHandler):
         given identity server
 
         Args:
-            id_server (str|None): The identity server to validate 3PIDs against. If None,
-                we will attempt to extract id_server creds
+            id_server (str): The identity server to validate 3PIDs against. Must be a
+                complete URL including the protocol (http(s)://)
 
             creds (dict[str, str]): Dictionary containing the following keys:
-                * id_server|idServer: An optional domain name of an identity server
                 * client_secret|clientSecret: A unique secret str provided by the client
                 * sid: The ID of the validation session
 
@@ -104,20 +103,10 @@ class IdentityHandler(BaseHandler):
             raise SynapseError(
                 400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM
             )
-        if not id_server:
-            # Attempt to get the id_server from the creds dict
-            id_server = creds.get("id_server") or creds.get("idServer")
-            if not id_server:
-                raise SynapseError(
-                    400, "Missing param id_server in creds", errcode=Codes.MISSING_PARAM
-                )
 
         query_params = {"sid": session_id, "client_secret": client_secret}
 
-        url = "https://%s%s" % (
-            id_server,
-            "/_matrix/identity/api/v1/3pid/getValidated3pid",
-        )
+        url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid"
 
         data = yield self.http_client.get_json(url, query_params)
         return data if "medium" in data else None
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 2437235dc..5a4fc78b4 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -179,6 +179,35 @@ class Mailer(object):
             template_vars,
         )
 
+    @defer.inlineCallbacks
+    def send_add_threepid_mail(self, email_address, token, client_secret, sid):
+        """Send an email with a validation link to a user for adding a 3pid to their account
+
+        Args:
+            email_address (str): Email address we're sending the validation link to
+
+            token (str): Unique token generated by the server to verify the email was received
+
+            client_secret (str): Unique token generated by the client to group together
+                multiple email sending attempts
+
+            sid (str): The generated session ID
+        """
+        params = {"token": token, "client_secret": client_secret, "sid": sid}
+        link = (
+            self.hs.config.public_baseurl
+            + "_matrix/client/unstable/add_threepid/email/submit_token?%s"
+            % urllib.parse.urlencode(params)
+        )
+
+        template_vars = {"link": link}
+
+        yield self.send_email(
+            email_address,
+            "[%s] Validate Your Email" % self.hs.config.server_name,
+            template_vars,
+        )
+
     @defer.inlineCallbacks
     def send_notification_mail(
         self, app_id, user_id, email_address, push_actions, reason
diff --git a/synapse/res/templates/add_threepid.html b/synapse/res/templates/add_threepid.html
new file mode 100644
index 000000000..cc4ab07e0
--- /dev/null
+++ b/synapse/res/templates/add_threepid.html
@@ -0,0 +1,9 @@
+<html>
+<body>
+    <p>A request to add an email address to your Matrix account has been received. If this was you, please click the link below to confirm adding this email:</p>
+
+    <a href="{{ link }}">{{ link }}</a>
+
+    <p>If this was not you, you can safely ignore this email. Thank you.</p>
+</body>
+</html>
diff --git a/synapse/res/templates/add_threepid.txt b/synapse/res/templates/add_threepid.txt
new file mode 100644
index 000000000..a60c1ff65
--- /dev/null
+++ b/synapse/res/templates/add_threepid.txt
@@ -0,0 +1,6 @@
+A request to add an email address to your Matrix account has been received. If this was you,
+please click the link below to confirm adding this email:
+
+{{ link }}
+
+If this was not you, you can safely ignore this email. Thank you.
diff --git a/synapse/res/templates/add_threepid_failure.html b/synapse/res/templates/add_threepid_failure.html
new file mode 100644
index 000000000..441d11c84
--- /dev/null
+++ b/synapse/res/templates/add_threepid_failure.html
@@ -0,0 +1,8 @@
+<html>
+<head></head>
+<body>
+<p>The request failed for the following reason: {{ failure_reason }}.</p>
+
+<p>No changes have been made to your account.</p>
+</body>
+</html>
diff --git a/synapse/res/templates/add_threepid_success.html b/synapse/res/templates/add_threepid_success.html
new file mode 100644
index 000000000..fbd6e4018
--- /dev/null
+++ b/synapse/res/templates/add_threepid_success.html
@@ -0,0 +1,6 @@
+<html>
+<head></head>
+<body>
+<p>Your email has now been validated, please return to your client. You may now close this window.</p>
+</body>
+</html>
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 3c5b23dc8..1139bb156 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -21,7 +21,12 @@ from six.moves import http_client
 from twisted.internet import defer
 
 from synapse.api.constants import LoginType
-from synapse.api.errors import Codes, SynapseError, ThreepidValidationError
+from synapse.api.errors import (
+    Codes,
+    HttpResponseException,
+    SynapseError,
+    ThreepidValidationError,
+)
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.server import finish_request
 from synapse.http.servlet import (
@@ -103,16 +108,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
             raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND)
 
         if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            # Have the configured identity server handle the request
-            if not self.hs.config.account_threepid_delegate_email:
-                logger.warn(
-                    "No upstream email account_threepid_delegate configured on the server to "
-                    "handle this request"
-                )
-                raise SynapseError(
-                    400, "Password reset by email is not supported on this homeserver"
-                )
+            assert self.hs.config.account_threepid_delegate_email
 
+            # Have the configured identity server handle the request
             ret = yield self.identity_handler.requestEmailToken(
                 self.hs.config.account_threepid_delegate_email,
                 email,
@@ -214,6 +212,11 @@ class PasswordResetSubmitTokenServlet(RestServlet):
         self.config = hs.config
         self.clock = hs.get_clock()
         self.store = hs.get_datastore()
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            self.failure_email_template, = load_jinja2_templates(
+                self.config.email_template_dir,
+                [self.config.email_password_reset_template_failure_html],
+            )
 
     @defer.inlineCallbacks
     def on_GET(self, request, medium):
@@ -261,13 +264,8 @@ class PasswordResetSubmitTokenServlet(RestServlet):
             request.setResponseCode(e.code)
 
             # Show a failure page with a reason
-            html_template, = load_jinja2_templates(
-                self.config.email_template_dir,
-                [self.config.email_password_reset_template_failure_html],
-            )
-
             template_vars = {"failure_reason": e.msg}
-            html = html_template.render(**template_vars)
+            html = self.failure_email_template.render(**template_vars)
 
         request.write(html.encode("utf-8"))
         finish_request(request)
@@ -399,13 +397,35 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_handlers().identity_handler
         self.store = self.hs.get_datastore()
 
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            template_html, template_text = load_jinja2_templates(
+                self.config.email_template_dir,
+                [
+                    self.config.email_add_threepid_template_html,
+                    self.config.email_add_threepid_template_text,
+                ],
+                public_baseurl=self.config.public_baseurl,
+            )
+            self.mailer = Mailer(
+                hs=self.hs,
+                app_name=self.config.email_app_name,
+                template_html=template_html,
+                template_text=template_text,
+            )
+
     @defer.inlineCallbacks
     def on_POST(self, request):
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF:
+            if self.config.local_threepid_handling_disabled_due_to_email_config:
+                logger.warn(
+                    "Adding emails have been disabled due to lack of an email config"
+                )
+            raise SynapseError(
+                400, "Adding an email to your account is disabled on this server"
+            )
+
         body = parse_json_object_from_request(request)
-        assert_params_in_dict(
-            body, ["id_server", "client_secret", "email", "send_attempt"]
-        )
-        id_server = "https://" + body["id_server"]  # Assume https
+        assert_params_in_dict(body, ["client_secret", "email", "send_attempt"])
         client_secret = body["client_secret"]
         email = body["email"]
         send_attempt = body["send_attempt"]
@@ -425,9 +445,30 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
         if existing_user_id is not None:
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
-        ret = yield self.identity_handler.requestEmailToken(
-            id_server, email, client_secret, send_attempt, next_link
-        )
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+            assert self.hs.config.account_threepid_delegate_email
+
+            # Have the configured identity server handle the request
+            ret = yield self.identity_handler.requestEmailToken(
+                self.hs.config.account_threepid_delegate_email,
+                email,
+                client_secret,
+                send_attempt,
+                next_link,
+            )
+        else:
+            # Send threepid validation emails from Synapse
+            sid = yield self.identity_handler.send_threepid_validation(
+                email,
+                client_secret,
+                send_attempt,
+                self.mailer.send_add_threepid_mail,
+                next_link,
+            )
+
+            # Wrap the session id in a JSON object
+            ret = {"sid": sid}
+
         return 200, ret
 
 
@@ -471,9 +512,86 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
         ret = yield self.identity_handler.requestMsisdnToken(
             id_server, country, phone_number, client_secret, send_attempt, next_link
         )
+
         return 200, ret
 
 
+class AddThreepidSubmitTokenServlet(RestServlet):
+    """Handles 3PID validation token submission for adding an email to a user's account"""
+
+    PATTERNS = client_patterns(
+        "/add_threepid/email/submit_token$", releases=(), unstable=True
+    )
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super().__init__()
+        self.config = hs.config
+        self.clock = hs.get_clock()
+        self.store = hs.get_datastore()
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            self.failure_email_template, = load_jinja2_templates(
+                self.config.email_template_dir,
+                [self.config.email_add_threepid_template_failure_html],
+            )
+
+    @defer.inlineCallbacks
+    def on_GET(self, request):
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF:
+            if self.config.local_threepid_handling_disabled_due_to_email_config:
+                logger.warn(
+                    "Adding emails have been disabled due to lack of an email config"
+                )
+            raise SynapseError(
+                400, "Adding an email to your account is disabled on this server"
+            )
+        elif self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+            raise SynapseError(
+                400,
+                "This homeserver is not validating threepids. Use an identity server "
+                "instead.",
+            )
+
+        sid = parse_string(request, "sid", required=True)
+        client_secret = parse_string(request, "client_secret", required=True)
+        token = parse_string(request, "token", required=True)
+
+        # Attempt to validate a 3PID session
+        try:
+            # Mark the session as valid
+            next_link = yield self.store.validate_threepid_session(
+                sid, client_secret, token, self.clock.time_msec()
+            )
+
+            # Perform a 302 redirect if next_link is set
+            if next_link:
+                if next_link.startswith("file:///"):
+                    logger.warn(
+                        "Not redirecting to next_link as it is a local file: address"
+                    )
+                else:
+                    request.setResponseCode(302)
+                    request.setHeader("Location", next_link)
+                    finish_request(request)
+                    return None
+
+            # Otherwise show the success template
+            html = self.config.email_add_threepid_template_success_html_content
+            request.setResponseCode(200)
+        except ThreepidValidationError as e:
+            request.setResponseCode(e.code)
+
+            # Show a failure page with a reason
+            template_vars = {"failure_reason": e.msg}
+            html = self.failure_email_template.render(**template_vars)
+
+        request.write(html.encode("utf-8"))
+        finish_request(request)
+
+
 class ThreepidRestServlet(RestServlet):
     PATTERNS = client_patterns("/account/3pid$")
 
@@ -495,6 +613,8 @@ class ThreepidRestServlet(RestServlet):
 
     @defer.inlineCallbacks
     def on_POST(self, request):
+        requester = yield self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
         body = parse_json_object_from_request(request)
 
         threepid_creds = body.get("threePidCreds") or body.get("three_pid_creds")
@@ -502,26 +622,85 @@ class ThreepidRestServlet(RestServlet):
             raise SynapseError(
                 400, "Missing param three_pid_creds", Codes.MISSING_PARAM
             )
+        assert_params_in_dict(threepid_creds, ["client_secret", "sid"])
 
-        requester = yield self.auth.get_user_by_req(request)
-        user_id = requester.user.to_string()
+        client_secret = threepid_creds["client_secret"]
+        sid = threepid_creds["sid"]
 
-        # Specify None as the identity server to retrieve it from the request body instead
-        threepid = yield self.identity_handler.threepid_from_creds(None, threepid_creds)
+        # We don't actually know which medium this 3PID is. Thus we first assume it's email,
+        # and if validation fails we try msisdn
+        validation_session = None
 
-        if not threepid:
-            raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED)
+        # Try to validate as email
+        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+            # Ask our delegated email identity server
+            try:
+                validation_session = yield self.identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_email, threepid_creds
+                )
+            except HttpResponseException:
+                logger.debug(
+                    "%s reported non-validated threepid: %s",
+                    self.hs.config.account_threepid_delegate_email,
+                    threepid_creds,
+                )
+        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            # Get a validated session matching these details
+            validation_session = yield self.datastore.get_threepid_validation_session(
+                "email", client_secret, sid=sid, validated=True
+            )
 
-        for reqd in ["medium", "address", "validated_at"]:
-            if reqd not in threepid:
-                logger.warn("Couldn't add 3pid: invalid response from ID server")
-                raise SynapseError(500, "Invalid response from ID Server")
+        # Old versions of Sydent return a 200 http code even on a failed validation check.
+        # Thus, in addition to the HttpResponseException check above (which checks for
+        # non-200 errors), we need to make sure validation_session isn't actually an error,
+        # identified by containing an "error" key
+        # See https://github.com/matrix-org/sydent/issues/215 for details
+        if validation_session and "error" not in validation_session:
+            yield self._add_threepid_to_account(user_id, validation_session)
+            return 200, {}
 
-        yield self.auth_handler.add_threepid(
-            user_id, threepid["medium"], threepid["address"], threepid["validated_at"]
+        # Try to validate as msisdn
+        if self.hs.config.account_threepid_delegate_msisdn:
+            # Ask our delegated msisdn identity server
+            try:
+                validation_session = yield self.identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+                )
+            except HttpResponseException:
+                logger.debug(
+                    "%s reported non-validated threepid: %s",
+                    self.hs.config.account_threepid_delegate_email,
+                    threepid_creds,
+                )
+
+            # Check that validation_session isn't actually an error due to old Sydent instances
+            # See explanatory comment above
+            if validation_session and "error" not in validation_session:
+                yield self._add_threepid_to_account(user_id, validation_session)
+                return 200, {}
+
+        raise SynapseError(
+            400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED
         )
 
-        return 200, {}
+    @defer.inlineCallbacks
+    def _add_threepid_to_account(self, user_id, validation_session):
+        """Add a threepid wrapped in a validation_session dict to an account
+
+        Args:
+            user_id (str): The mxid of the user to add this 3PID to
+
+            validation_session (dict): A dict containing the following:
+                * medium       - medium of the threepid
+                * address      - address of the threepid
+                * validated_at - timestamp of when the validation occurred
+        """
+        yield self.auth_handler.add_threepid(
+            user_id,
+            validation_session["medium"],
+            validation_session["address"],
+            validation_session["validated_at"],
+        )
 
 
 class ThreepidUnbindRestServlet(RestServlet):
@@ -613,6 +792,7 @@ def register_servlets(hs, http_server):
     DeactivateAccountRestServlet(hs).register(http_server)
     EmailThreepidRequestTokenRestServlet(hs).register(http_server)
     MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
+    AddThreepidSubmitTokenServlet(hs).register(http_server)
     ThreepidRestServlet(hs).register(http_server)
     ThreepidUnbindRestServlet(hs).register(http_server)
     ThreepidDeleteRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 5c7a5f357..34276ea3f 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -131,15 +131,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
         if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            if not self.hs.config.account_threepid_delegate_email:
-                logger.warn(
-                    "No upstream email account_threepid_delegate configured on the server to "
-                    "handle this request"
-                )
-                raise SynapseError(
-                    400, "Registration by email is not supported on this homeserver"
-                )
+            assert self.hs.config.account_threepid_delegate_email
 
+            # Have the configured identity server handle the request
             ret = yield self.identity_handler.requestEmailToken(
                 self.hs.config.account_threepid_delegate_email,
                 email,
@@ -246,6 +240,12 @@ class RegistrationSubmitTokenServlet(RestServlet):
         self.clock = hs.get_clock()
         self.store = hs.get_datastore()
 
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            self.failure_email_template, = load_jinja2_templates(
+                self.config.email_template_dir,
+                [self.config.email_registration_template_failure_html],
+            )
+
     @defer.inlineCallbacks
     def on_GET(self, request, medium):
         if medium != "email":
@@ -289,17 +289,11 @@ class RegistrationSubmitTokenServlet(RestServlet):
 
             request.setResponseCode(200)
         except ThreepidValidationError as e:
-            # Show a failure page with a reason
             request.setResponseCode(e.code)
 
             # Show a failure page with a reason
-            html_template, = load_jinja2_templates(
-                self.config.email_template_dir,
-                [self.config.email_registration_template_failure_html],
-            )
-
             template_vars = {"failure_reason": e.msg}
-            html = html_template.render(**template_vars)
+            html = self.failure_email_template.render(**template_vars)
 
         request.write(html.encode("utf-8"))
         finish_request(request)
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 109052fa4..da27ad76b 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -24,7 +24,7 @@ from six.moves import range
 from twisted.internet import defer
 
 from synapse.api.constants import UserTypes
-from synapse.api.errors import Codes, StoreError, ThreepidValidationError
+from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage import background_updates
 from synapse.storage._base import SQLBaseStore
@@ -661,18 +661,31 @@ class RegistrationWorkerStore(SQLBaseStore):
             medium (str|None): The medium of the 3PID
             address (str|None): The address of the 3PID
             sid (str|None): The ID of the validation session
-            client_secret (str|None): A unique string provided by the client to
-                help identify this validation attempt
+            client_secret (str): A unique string provided by the client to help identify this
+                validation attempt
             validated (bool|None): Whether sessions should be filtered by
                 whether they have been validated already or not. None to
                 perform no filtering
 
         Returns:
-            deferred {str, int}|None: A dict containing the
-                latest session_id and send_attempt count for this 3PID.
-                Otherwise None if there hasn't been a previous attempt
+            Deferred[dict|None]: A dict containing the following:
+                * address - address of the 3pid
+                * medium - medium of the 3pid
+                * client_secret - a secret provided by the client for this validation session
+                * session_id - ID of the validation session
+                * send_attempt - a number serving to dedupe send attempts for this session
+                * validated_at - timestamp of when this session was validated if so
+
+                Otherwise None if a validation session is not found
         """
-        keyvalues = {"medium": medium, "client_secret": client_secret}
+        if not client_secret:
+            raise SynapseError(
+                400, "Missing parameter: client_secret", errcode=Codes.MISSING_PARAM
+            )
+
+        keyvalues = {"client_secret": client_secret}
+        if medium:
+            keyvalues["medium"] = medium
         if address:
             keyvalues["address"] = address
         if sid:
@@ -1209,6 +1222,10 @@ class RegistrationStore(
             current_ts (int): The current unix time in milliseconds. Used for
                 checking token expiry status
 
+        Raises:
+            ThreepidValidationError: if a matching validation token was not found or has
+                expired
+
         Returns:
             deferred str|None: A str representing a link to redirect the user
             to if there is one.

From 7ef319aefe6a44ee0bdfa4bdc4c05ce8ef96cb18 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 23 Sep 2019 12:28:01 +0100
Subject: [PATCH 075/276] fix broken copyrights

---
 synapse/config/repository.py | 2 +-
 synapse/config/workers.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 34f1a9a92..52e014608 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2014, 2015 matrix.org
+# Copyright 2014, 2015 OpenMarket Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index bc0fc165e..1ec499862 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2016 matrix.org
+# Copyright 2016 OpenMarket Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 885a4726b7f9cdf02187b92b43f639e2cbfbb12e Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 15:37:23 +0200
Subject: [PATCH 076/276] Return timeout error to user for identity server
 calls (#6073)

---
 changelog.d/6073.feature        |  1 +
 synapse/handlers/identity.py    | 16 ++++++++++++++--
 synapse/handlers/room_member.py | 32 +++++++++++++++++++++++---------
 3 files changed, 38 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/6073.feature

diff --git a/changelog.d/6073.feature b/changelog.d/6073.feature
new file mode 100644
index 000000000..15d993389
--- /dev/null
+++ b/changelog.d/6073.feature
@@ -0,0 +1 @@
+Return a clearer error message when a timeout occurs when attempting to contact an identity server.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 156719e30..cd4700b52 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -22,6 +22,7 @@ import logging
 from canonicaljson import json
 
 from twisted.internet import defer
+from twisted.internet.error import TimeoutError
 
 from synapse.api.errors import (
     CodeMessageException,
@@ -108,7 +109,10 @@ class IdentityHandler(BaseHandler):
 
         url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid"
 
-        data = yield self.http_client.get_json(url, query_params)
+        try:
+            data = yield self.http_client.get_json(url, query_params)
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
         return data if "medium" in data else None
 
     @defer.inlineCallbacks
@@ -171,6 +175,8 @@ class IdentityHandler(BaseHandler):
             if e.code != 404 or not use_v2:
                 logger.error("3PID bind failed with Matrix error: %r", e)
                 raise e.to_synapse_error()
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
         except CodeMessageException as e:
             data = json.loads(e.msg)  # XXX WAT?
             return data
@@ -261,7 +267,9 @@ class IdentityHandler(BaseHandler):
                 logger.warn("Received %d response while unbinding threepid", e.code)
             else:
                 logger.error("Failed to unbind threepid on identity server: %s", e)
-                raise SynapseError(502, "Failed to contact identity server")
+                raise SynapseError(500, "Failed to contact identity server")
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
 
         yield self.store.remove_user_bound_threepid(
             user_id=mxid,
@@ -394,6 +402,8 @@ class IdentityHandler(BaseHandler):
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
 
     @defer.inlineCallbacks
     def requestMsisdnToken(
@@ -446,6 +456,8 @@ class IdentityHandler(BaseHandler):
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
 
 
 def create_id_access_token_header(id_access_token):
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 35450feb6..39df0f128 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -25,6 +25,7 @@ from signedjson.sign import verify_signed_json
 from unpaddedbase64 import decode_base64
 
 from twisted.internet import defer
+from twisted.internet.error import TimeoutError
 
 from synapse import types
 from synapse.api.constants import EventTypes, Membership
@@ -756,7 +757,8 @@ class RoomMemberHandler(object):
                     raise AuthError(401, "No signatures on 3pid binding")
                 yield self._verify_any_signature(data, id_server)
                 return data["mxid"]
-
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
         except IOError as e:
             logger.warning("Error from v1 identity server lookup: %s" % (e,))
 
@@ -777,10 +779,13 @@ class RoomMemberHandler(object):
             Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised.
         """
         # Check what hashing details are supported by this identity server
-        hash_details = yield self.simple_http_client.get_json(
-            "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
-            {"access_token": id_access_token},
-        )
+        try:
+            hash_details = yield self.simple_http_client.get_json(
+                "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
+                {"access_token": id_access_token},
+            )
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
 
         if not isinstance(hash_details, dict):
             logger.warning(
@@ -851,6 +856,8 @@ class RoomMemberHandler(object):
                 },
                 headers=headers,
             )
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
         except Exception as e:
             logger.warning("Error when performing a v2 3pid lookup: %s", e)
             raise SynapseError(
@@ -873,10 +880,13 @@ class RoomMemberHandler(object):
         if server_hostname not in data["signatures"]:
             raise AuthError(401, "No signature from server %s" % (server_hostname,))
         for key_name, signature in data["signatures"][server_hostname].items():
-            key_data = yield self.simple_http_client.get_json(
-                "%s%s/_matrix/identity/api/v1/pubkey/%s"
-                % (id_server_scheme, server_hostname, key_name)
-            )
+            try:
+                key_data = yield self.simple_http_client.get_json(
+                    "%s%s/_matrix/identity/api/v1/pubkey/%s"
+                    % (id_server_scheme, server_hostname, key_name)
+                )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
             if "public_key" not in key_data:
                 raise AuthError(
                     401, "No public key named %s from %s" % (key_name, server_hostname)
@@ -1051,6 +1061,8 @@ class RoomMemberHandler(object):
                     invite_config,
                     {"Authorization": create_id_access_token_header(id_access_token)},
                 )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
             except HttpResponseException as e:
                 if e.code != 404:
                     logger.info("Failed to POST %s with JSON: %s", url, e)
@@ -1067,6 +1079,8 @@ class RoomMemberHandler(object):
                 data = yield self.simple_http_client.post_json_get_json(
                     url, invite_config
                 )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
             except HttpResponseException as e:
                 logger.warning(
                     "Error trying to call /store-invite on %s%s: %s",

From 1c9feadf4bf0755162d0d210bea398a3fb690ab6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 15:38:19 +0200
Subject: [PATCH 077/276] Generalize email sending logging (#6075)

In ancient times Synapse would only send emails when it was notifying a user about a message they received...

Now it can do all sorts of neat things!

Change the logging so it's not just about notifications.
---
 changelog.d/6075.misc  | 1 +
 synapse/push/mailer.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6075.misc

diff --git a/changelog.d/6075.misc b/changelog.d/6075.misc
new file mode 100644
index 000000000..914e56bcf
--- /dev/null
+++ b/changelog.d/6075.misc
@@ -0,0 +1 @@
+Change mailer logging to reflect Synapse doesn't just do chat notifications by email now.
\ No newline at end of file
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 5a4fc78b4..5b16ab4ae 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -311,7 +311,7 @@ class Mailer(object):
         multipart_msg.attach(text_part)
         multipart_msg.attach(html_part)
 
-        logger.info("Sending email notification to %s" % email_address)
+        logger.info("Sending email to %s" % email_address)
 
         yield make_deferred_yieldable(
             self.sendmail(

From 1b519e0272a13649d442aad2a10c9a3b39c2d200 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 16:38:38 +0200
Subject: [PATCH 078/276] Disable /register/available if registration is
 disabled (#6082)

Fixes #6066

This register endpoint should be disabled if registration is disabled, otherwise we're giving anyone the ability to check if a username exists on a server when we don't need to be.

Error code is 403 (Forbidden) as that's the same returned by /register when registration is disabled.
---
 changelog.d/6082.feature                 | 1 +
 synapse/rest/client/v2_alpha/register.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/6082.feature

diff --git a/changelog.d/6082.feature b/changelog.d/6082.feature
new file mode 100644
index 000000000..c30662b60
--- /dev/null
+++ b/changelog.d/6082.feature
@@ -0,0 +1 @@
+Return 403 on `/register/available` if registration has been disabled.
\ No newline at end of file
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 34276ea3f..e99b1f5c4 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -328,6 +328,11 @@ class UsernameAvailabilityRestServlet(RestServlet):
 
     @defer.inlineCallbacks
     def on_GET(self, request):
+        if not self.hs.config.enable_registration:
+            raise SynapseError(
+                403, "Registration has been disabled", errcode=Codes.FORBIDDEN
+            )
+
         ip = self.hs.get_ip_from_request(request)
         with self.ratelimiter.ratelimit(ip) as wait_deferred:
             yield wait_deferred

From 30af161af27146cc44152292060c7005a6b8546b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 17:50:27 +0200
Subject: [PATCH 079/276] Implement MSC2290 (#6043)

Implements MSC2290. This PR adds two new endpoints, /unstable/account/3pid/add and /unstable/account/3pid/bind. Depending on the progress of that MSC the unstable prefix may go away.

This PR also removes the blacklist on some 3PID tests which occurs in #6042, as the corresponding Sytest PR changes them to use the new endpoints.

Finally, it also modifies the account deactivation code such that it doesn't just try to deactivate 3PIDs that were bound to the user's account, but any 3PIDs that were bound through the homeserver on that user's account.
---
 changelog.d/6043.feature                 |   1 +
 synapse/handlers/deactivate_account.py   |   4 +-
 synapse/handlers/identity.py             | 134 ++++++++++++-------
 synapse/rest/client/v2_alpha/account.py  | 163 +++++++++++++----------
 synapse/rest/client/v2_alpha/register.py |   6 +
 synapse/storage/registration.py          |  22 ++-
 sytest-blacklist                         |   9 --
 7 files changed, 204 insertions(+), 135 deletions(-)
 create mode 100644 changelog.d/6043.feature

diff --git a/changelog.d/6043.feature b/changelog.d/6043.feature
new file mode 100644
index 000000000..cd27b0400
--- /dev/null
+++ b/changelog.d/6043.feature
@@ -0,0 +1 @@
+Implement new Client Server API endpoints `/account/3pid/add` and `/account/3pid/bind` as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290).
\ No newline at end of file
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 5f804d1f1..d83912c9a 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -73,7 +73,9 @@ class DeactivateAccountHandler(BaseHandler):
         # unbinding
         identity_server_supports_unbinding = True
 
-        threepids = yield self.store.user_get_threepids(user_id)
+        # Retrieve the 3PIDs this user has bound to an identity server
+        threepids = yield self.store.user_get_bound_threepids(user_id)
+
         for threepid in threepids:
             try:
                 result = yield self._identity_handler.try_unbind_threepid(
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index cd4700b52..d50d485e0 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -30,6 +30,7 @@ from synapse.api.errors import (
     HttpResponseException,
     SynapseError,
 )
+from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.util.stringutils import random_string
 
 from ._base import BaseHandler
@@ -45,36 +46,6 @@ class IdentityHandler(BaseHandler):
         self.federation_http_client = hs.get_http_client()
         self.hs = hs
 
-    def _extract_items_from_creds_dict(self, creds):
-        """
-        Retrieve entries from a "credentials" dictionary
-
-        Args:
-            creds (dict[str, str]): Dictionary of credentials that contain the following keys:
-                * client_secret|clientSecret: A unique secret str provided by the client
-                * id_server|idServer: the domain of the identity server to query
-                * id_access_token: The access token to authenticate to the identity
-                    server with.
-
-        Returns:
-            tuple(str, str, str|None): A tuple containing the client_secret, the id_server,
-                and the id_access_token value if available.
-        """
-        client_secret = creds.get("client_secret") or creds.get("clientSecret")
-        if not client_secret:
-            raise SynapseError(
-                400, "No client_secret in creds", errcode=Codes.MISSING_PARAM
-            )
-
-        id_server = creds.get("id_server") or creds.get("idServer")
-        if not id_server:
-            raise SynapseError(
-                400, "No id_server in creds", errcode=Codes.MISSING_PARAM
-            )
-
-        id_access_token = creds.get("id_access_token")
-        return client_secret, id_server, id_access_token
-
     @defer.inlineCallbacks
     def threepid_from_creds(self, id_server, creds):
         """
@@ -113,35 +84,50 @@ class IdentityHandler(BaseHandler):
             data = yield self.http_client.get_json(url, query_params)
         except TimeoutError:
             raise SynapseError(500, "Timed out contacting identity server")
-        return data if "medium" in data else None
+        except HttpResponseException as e:
+            logger.info(
+                "%s returned %i for threepid validation for: %s",
+                id_server,
+                e.code,
+                creds,
+            )
+            return None
+
+        # Old versions of Sydent return a 200 http code even on a failed validation
+        # check. Thus, in addition to the HttpResponseException check above (which
+        # checks for non-200 errors), we need to make sure validation_session isn't
+        # actually an error, identified by the absence of a "medium" key
+        # See https://github.com/matrix-org/sydent/issues/215 for details
+        if "medium" in data:
+            return data
+
+        logger.info("%s reported non-validated threepid: %s", id_server, creds)
+        return None
 
     @defer.inlineCallbacks
-    def bind_threepid(self, creds, mxid, use_v2=True):
+    def bind_threepid(
+        self, client_secret, sid, mxid, id_server, id_access_token=None, use_v2=True
+    ):
         """Bind a 3PID to an identity server
 
         Args:
-            creds (dict[str, str]): Dictionary of credentials that contain the following keys:
-                * client_secret|clientSecret: A unique secret str provided by the client
-                * id_server|idServer: the domain of the identity server to query
-                * id_access_token: The access token to authenticate to the identity
-                    server with. Required if use_v2 is true
+            client_secret (str): A unique secret provided by the client
+
+            sid (str): The ID of the validation session
+
             mxid (str): The MXID to bind the 3PID to
-            use_v2 (bool): Whether to use v2 Identity Service API endpoints
+
+            id_server (str): The domain of the identity server to query
+
+            id_access_token (str): The access token to authenticate to the identity
+                server with, if necessary. Required if use_v2 is true
+
+            use_v2 (bool): Whether to use v2 Identity Service API endpoints. Defaults to True
 
         Returns:
             Deferred[dict]: The response from the identity server
         """
-        logger.debug("binding threepid %r to %s", creds, mxid)
-
-        client_secret, id_server, id_access_token = self._extract_items_from_creds_dict(
-            creds
-        )
-
-        sid = creds.get("sid")
-        if not sid:
-            raise SynapseError(
-                400, "No sid in three_pid_creds", errcode=Codes.MISSING_PARAM
-            )
+        logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server)
 
         # If an id_access_token is not supplied, force usage of v1
         if id_access_token is None:
@@ -160,7 +146,6 @@ class IdentityHandler(BaseHandler):
             data = yield self.http_client.post_json_get_json(
                 bind_url, bind_data, headers=headers
             )
-            logger.debug("bound threepid %r to %s", creds, mxid)
 
             # Remember where we bound the threepid
             yield self.store.add_user_bound_threepid(
@@ -182,7 +167,10 @@ class IdentityHandler(BaseHandler):
             return data
 
         logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url)
-        return (yield self.bind_threepid(creds, mxid, use_v2=False))
+        res = yield self.bind_threepid(
+            client_secret, sid, mxid, id_server, id_access_token, use_v2=False
+        )
+        return res
 
     @defer.inlineCallbacks
     def try_unbind_threepid(self, mxid, threepid):
@@ -459,6 +447,50 @@ class IdentityHandler(BaseHandler):
         except TimeoutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
+    @defer.inlineCallbacks
+    def validate_threepid_session(self, client_secret, sid):
+        """Validates a threepid session with only the client secret and session ID
+        Tries validating against any configured account_threepid_delegates as well as locally.
+
+        Args:
+            client_secret (str): A secret provided by the client
+
+            sid (str): The ID of the session
+
+        Returns:
+            Dict[str, str|int] if validation was successful, otherwise None
+        """
+        # XXX: We shouldn't need to keep wrapping and unwrapping this value
+        threepid_creds = {"client_secret": client_secret, "sid": sid}
+
+        # We don't actually know which medium this 3PID is. Thus we first assume it's email,
+        # and if validation fails we try msisdn
+        validation_session = None
+
+        # Try to validate as email
+        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+            # Ask our delegated email identity server
+            validation_session = yield self.threepid_from_creds(
+                self.hs.config.account_threepid_delegate_email, threepid_creds
+            )
+        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            # Get a validated session matching these details
+            validation_session = yield self.store.get_threepid_validation_session(
+                "email", client_secret, sid=sid, validated=True
+            )
+
+        if validation_session:
+            return validation_session
+
+        # Try to validate as msisdn
+        if self.hs.config.account_threepid_delegate_msisdn:
+            # Ask our delegated msisdn identity server
+            validation_session = yield self.threepid_from_creds(
+                self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+            )
+
+        return validation_session
+
 
 def create_id_access_token_header(id_access_token):
     """Create an Authorization header for passing to SimpleHttpClient as the header value
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index 1139bb156..b8c48dc8f 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -21,12 +21,7 @@ from six.moves import http_client
 from twisted.internet import defer
 
 from synapse.api.constants import LoginType
-from synapse.api.errors import (
-    Codes,
-    HttpResponseException,
-    SynapseError,
-    ThreepidValidationError,
-)
+from synapse.api.errors import Codes, SynapseError, ThreepidValidationError
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.server import finish_request
 from synapse.http.servlet import (
@@ -485,10 +480,8 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
     def on_POST(self, request):
         body = parse_json_object_from_request(request)
         assert_params_in_dict(
-            body,
-            ["id_server", "client_secret", "country", "phone_number", "send_attempt"],
+            body, ["client_secret", "country", "phone_number", "send_attempt"]
         )
-        id_server = "https://" + body["id_server"]  # Assume https
         client_secret = body["client_secret"]
         country = body["country"]
         phone_number = body["phone_number"]
@@ -509,8 +502,23 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
         if existing_user_id is not None:
             raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE)
 
+        if not self.hs.config.account_threepid_delegate_msisdn:
+            logger.warn(
+                "No upstream msisdn account_threepid_delegate configured on the server to "
+                "handle this request"
+            )
+            raise SynapseError(
+                400,
+                "Adding phone numbers to user account is not supported by this homeserver",
+            )
+
         ret = yield self.identity_handler.requestMsisdnToken(
-            id_server, country, phone_number, client_secret, send_attempt, next_link
+            self.hs.config.account_threepid_delegate_msisdn,
+            country,
+            phone_number,
+            client_secret,
+            send_attempt,
+            next_link,
         )
 
         return 200, ret
@@ -627,80 +635,87 @@ class ThreepidRestServlet(RestServlet):
         client_secret = threepid_creds["client_secret"]
         sid = threepid_creds["sid"]
 
-        # We don't actually know which medium this 3PID is. Thus we first assume it's email,
-        # and if validation fails we try msisdn
-        validation_session = None
-
-        # Try to validate as email
-        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            # Ask our delegated email identity server
-            try:
-                validation_session = yield self.identity_handler.threepid_from_creds(
-                    self.hs.config.account_threepid_delegate_email, threepid_creds
-                )
-            except HttpResponseException:
-                logger.debug(
-                    "%s reported non-validated threepid: %s",
-                    self.hs.config.account_threepid_delegate_email,
-                    threepid_creds,
-                )
-        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
-            # Get a validated session matching these details
-            validation_session = yield self.datastore.get_threepid_validation_session(
-                "email", client_secret, sid=sid, validated=True
+        validation_session = yield self.identity_handler.validate_threepid_session(
+            client_secret, sid
+        )
+        if validation_session:
+            yield self.auth_handler.add_threepid(
+                user_id,
+                validation_session["medium"],
+                validation_session["address"],
+                validation_session["validated_at"],
             )
-
-        # Old versions of Sydent return a 200 http code even on a failed validation check.
-        # Thus, in addition to the HttpResponseException check above (which checks for
-        # non-200 errors), we need to make sure validation_session isn't actually an error,
-        # identified by containing an "error" key
-        # See https://github.com/matrix-org/sydent/issues/215 for details
-        if validation_session and "error" not in validation_session:
-            yield self._add_threepid_to_account(user_id, validation_session)
             return 200, {}
 
-        # Try to validate as msisdn
-        if self.hs.config.account_threepid_delegate_msisdn:
-            # Ask our delegated msisdn identity server
-            try:
-                validation_session = yield self.identity_handler.threepid_from_creds(
-                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
-                )
-            except HttpResponseException:
-                logger.debug(
-                    "%s reported non-validated threepid: %s",
-                    self.hs.config.account_threepid_delegate_email,
-                    threepid_creds,
-                )
-
-            # Check that validation_session isn't actually an error due to old Sydent instances
-            # See explanatory comment above
-            if validation_session and "error" not in validation_session:
-                yield self._add_threepid_to_account(user_id, validation_session)
-                return 200, {}
-
         raise SynapseError(
             400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED
         )
 
+
+class ThreepidAddRestServlet(RestServlet):
+    PATTERNS = client_patterns("/account/3pid/add$", releases=(), unstable=True)
+
+    def __init__(self, hs):
+        super(ThreepidAddRestServlet, self).__init__()
+        self.hs = hs
+        self.identity_handler = hs.get_handlers().identity_handler
+        self.auth = hs.get_auth()
+        self.auth_handler = hs.get_auth_handler()
+
     @defer.inlineCallbacks
-    def _add_threepid_to_account(self, user_id, validation_session):
-        """Add a threepid wrapped in a validation_session dict to an account
+    def on_POST(self, request):
+        requester = yield self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
+        body = parse_json_object_from_request(request)
 
-        Args:
-            user_id (str): The mxid of the user to add this 3PID to
+        assert_params_in_dict(body, ["client_secret", "sid"])
+        client_secret = body["client_secret"]
+        sid = body["sid"]
 
-            validation_session (dict): A dict containing the following:
-                * medium       - medium of the threepid
-                * address      - address of the threepid
-                * validated_at - timestamp of when the validation occurred
-        """
-        yield self.auth_handler.add_threepid(
-            user_id,
-            validation_session["medium"],
-            validation_session["address"],
-            validation_session["validated_at"],
+        validation_session = yield self.identity_handler.validate_threepid_session(
+            client_secret, sid
         )
+        if validation_session:
+            yield self.auth_handler.add_threepid(
+                user_id,
+                validation_session["medium"],
+                validation_session["address"],
+                validation_session["validated_at"],
+            )
+            return 200, {}
+
+        raise SynapseError(
+            400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED
+        )
+
+
+class ThreepidBindRestServlet(RestServlet):
+    PATTERNS = client_patterns("/account/3pid/bind$", releases=(), unstable=True)
+
+    def __init__(self, hs):
+        super(ThreepidBindRestServlet, self).__init__()
+        self.hs = hs
+        self.identity_handler = hs.get_handlers().identity_handler
+        self.auth = hs.get_auth()
+
+    @defer.inlineCallbacks
+    def on_POST(self, request):
+        body = parse_json_object_from_request(request)
+
+        assert_params_in_dict(body, ["id_server", "sid", "client_secret"])
+        id_server = body["id_server"]
+        sid = body["sid"]
+        client_secret = body["client_secret"]
+        id_access_token = body.get("id_access_token")  # optional
+
+        requester = yield self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
+
+        yield self.identity_handler.bind_threepid(
+            client_secret, sid, user_id, id_server, id_access_token
+        )
+
+        return 200, {}
 
 
 class ThreepidUnbindRestServlet(RestServlet):
@@ -794,6 +809,8 @@ def register_servlets(hs, http_server):
     MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
     AddThreepidSubmitTokenServlet(hs).register(http_server)
     ThreepidRestServlet(hs).register(http_server)
+    ThreepidAddRestServlet(hs).register(http_server)
+    ThreepidBindRestServlet(hs).register(http_server)
     ThreepidUnbindRestServlet(hs).register(http_server)
     ThreepidDeleteRestServlet(hs).register(http_server)
     WhoamiRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index e99b1f5c4..135a70808 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -246,6 +246,12 @@ class RegistrationSubmitTokenServlet(RestServlet):
                 [self.config.email_registration_template_failure_html],
             )
 
+        if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            self.failure_email_template, = load_jinja2_templates(
+                self.config.email_template_dir,
+                [self.config.email_registration_template_failure_html],
+            )
+
     @defer.inlineCallbacks
     def on_GET(self, request, medium):
         if medium != "email":
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index da27ad76b..805411a6b 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -586,6 +586,26 @@ class RegistrationWorkerStore(SQLBaseStore):
             desc="add_user_bound_threepid",
         )
 
+    def user_get_bound_threepids(self, user_id):
+        """Get the threepids that a user has bound to an identity server through the homeserver
+        The homeserver remembers where binds to an identity server occurred. Using this
+        method can retrieve those threepids.
+
+        Args:
+            user_id (str): The ID of the user to retrieve threepids for
+
+        Returns:
+            Deferred[list[dict]]: List of dictionaries containing the following:
+                medium (str): The medium of the threepid (e.g "email")
+                address (str): The address of the threepid (e.g "bob@example.com")
+        """
+        return self._simple_select_list(
+            table="user_threepid_id_server",
+            keyvalues={"user_id": user_id},
+            retcols=["medium", "address"],
+            desc="user_get_bound_threepids",
+        )
+
     def remove_user_bound_threepid(self, user_id, medium, address, id_server):
         """The server proxied an unbind request to the given identity server on
         behalf of the given user, so we remove the mapping of threepid to
@@ -655,7 +675,7 @@ class RegistrationWorkerStore(SQLBaseStore):
         self, medium, client_secret, address=None, sid=None, validated=True
     ):
         """Gets a session_id and last_send_attempt (if available) for a
-        client_secret/medium/(address|session_id) combo
+        combination of validation metadata
 
         Args:
             medium (str|None): The medium of the 3PID
diff --git a/sytest-blacklist b/sytest-blacklist
index 04698cb06..11785fd43 100644
--- a/sytest-blacklist
+++ b/sytest-blacklist
@@ -29,12 +29,3 @@ Enabling an unknown default rule fails with 404
 
 # Blacklisted due to https://github.com/matrix-org/synapse/issues/1663
 New federated private chats get full presence information (SYN-115)
-
-# Blacklisted temporarily due to https://github.com/matrix-org/matrix-doc/pull/2290
-# These sytests need to be updated with new endpoints, which will come in a later PR
-# That PR will also remove this blacklist
-Can bind 3PID via home server
-Can bind and unbind 3PID via homeserver
-3PIDs are unbound after account deactivation
-Can bind and unbind 3PID via /unbind by specifying the identity server
-Can bind and unbind 3PID via /unbind without specifying the identity server

From 2ade05dca3d6da67e35c3a8ccdd278221f2566ed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 23 Sep 2019 14:16:10 +0100
Subject: [PATCH 080/276] Add last seen info to devices table.

This allows us to purge old user_ips entries without having to preserve
the latest last seen info for active devices.
---
 synapse/storage/client_ips.py                 | 15 +++++++++++++
 .../schema/delta/56/devices_last_seen.sql     | 21 +++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 synapse/storage/schema/delta/56/devices_last_seen.sql

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 6db8c5407..4db2e7f48 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -354,6 +354,21 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                     },
                     lock=False,
                 )
+
+                # Technically an access token might not be associated with
+                # a device so we need to check.
+                if device_id:
+                    self._simple_upsert_txn(
+                        txn,
+                        table="devices",
+                        keyvalues={"user_id": user_id, "device_id": device_id},
+                        values={
+                            "user_agent": user_agent,
+                            "last_seen": last_seen,
+                            "ip": ip,
+                        },
+                        lock=False,
+                    )
             except Exception as e:
                 # Failed to upsert, log and continue
                 logger.error("Failed to insert client IP %r: %r", entry, e)
diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql
new file mode 100644
index 000000000..8818eeeb7
--- /dev/null
+++ b/synapse/storage/schema/delta/56/devices_last_seen.sql
@@ -0,0 +1,21 @@
+/* Copyright 2019 Matrix.org Foundation CIC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Track last seen information for a device in the devices table, rather
+-- than relying on it being in the user_ips table (which we want to be able
+-- to purge old entries from)
+ALTER TABLE devices ADD COLUMN last_seen BIGINT;
+ALTER TABLE devices ADD COLUMN ip TEXT;
+ALTER TABLE devices ADD COLUMN user_agent TEXT;

From ed80231ade20ce7881bb2026692fe3a6252f1c02 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 23 Sep 2019 15:59:43 +0100
Subject: [PATCH 081/276] Add BG update to populate devices last seen info

---
 synapse/storage/client_ips.py                 | 52 +++++++++++++++++++
 .../schema/delta/56/devices_last_seen.sql     |  3 ++
 2 files changed, 55 insertions(+)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 4db2e7f48..883956226 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -85,6 +85,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique
         )
 
+        # Update the last seen info in devices.
+        self.register_background_update_handler(
+            "devices_last_seen", self._devices_last_seen_update
+        )
+
         # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
         self._batch_row_update = {}
 
@@ -485,3 +490,50 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             }
             for (access_token, ip), (user_agent, last_seen) in iteritems(results)
         )
+
+    @defer.inlineCallbacks
+    def _devices_last_seen_update(self, progress, batch_size):
+        """Background update to insert last seen info into devices table
+        """
+
+        last_user_id = progress.get("last_user_id", "")
+        last_device_id = progress.get("last_device_id", "")
+
+        def _devices_last_seen_update_txn(txn):
+            sql = """
+                SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices
+                INNER JOIN user_ips AS u USING (user_id, device_id)
+                WHERE user_id > ? OR (user_id = ? AND device_id > ?)
+                ORDER BY user_id ASC, device_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            sql = """
+                UPDATE devices
+                SET last_seen = ?, ip = ?, user_agent = ?
+                WHERE user_id = ? AND device_id = ?
+            """
+            txn.execute_batch(sql, rows)
+
+            _, _, _, user_id, device_id = rows[-1]
+            self._background_update_progress_txn(
+                txn,
+                "devices_last_seen",
+                {"last_user_id": user_id, "last_device_id": device_id},
+            )
+
+            return len(rows)
+
+        updated = yield self.runInteraction(
+            "_devices_last_seen_update", _devices_last_seen_update_txn
+        )
+
+        if not updated:
+            yield self._end_background_update("devices_last_seen")
+
+        return updated
diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql
index 8818eeeb7..dfa902d0b 100644
--- a/synapse/storage/schema/delta/56/devices_last_seen.sql
+++ b/synapse/storage/schema/delta/56/devices_last_seen.sql
@@ -19,3 +19,6 @@
 ALTER TABLE devices ADD COLUMN last_seen BIGINT;
 ALTER TABLE devices ADD COLUMN ip TEXT;
 ALTER TABLE devices ADD COLUMN user_agent TEXT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('devices_last_seen', '{}');

From 51d28272e20d799b2e35a8a14b3c1d9d5f555d10 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 23 Sep 2019 16:00:18 +0100
Subject: [PATCH 082/276] Query devices table for last seen info.

This is a) simpler than querying user_ips directly and b) means we can
purge older entries from user_ips without losing the required info.

The storage functions now no longer return the access_token, since it
was unused.
---
 synapse/storage/client_ips.py    | 57 +++++---------------------------
 tests/storage/test_client_ips.py |  1 -
 2 files changed, 8 insertions(+), 50 deletions(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 883956226..a4e6d9dbe 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -392,19 +392,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             keys giving the column names
         """
 
-        res = yield self.runInteraction(
-            "get_last_client_ip_by_device",
-            self._get_last_client_ip_by_device_txn,
-            user_id,
-            device_id,
-            retcols=(
-                "user_id",
-                "access_token",
-                "ip",
-                "user_agent",
-                "device_id",
-                "last_seen",
-            ),
+        keyvalues = {"user_id": user_id}
+        if device_id:
+            keyvalues["device_id"] = device_id
+
+        res = yield self._simple_select_list(
+            table="devices",
+            keyvalues=keyvalues,
+            retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"),
         )
 
         ret = {(d["user_id"], d["device_id"]): d for d in res}
@@ -423,42 +418,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                     }
         return ret
 
-    @classmethod
-    def _get_last_client_ip_by_device_txn(cls, txn, user_id, device_id, retcols):
-        where_clauses = []
-        bindings = []
-        if device_id is None:
-            where_clauses.append("user_id = ?")
-            bindings.extend((user_id,))
-        else:
-            where_clauses.append("(user_id = ? AND device_id = ?)")
-            bindings.extend((user_id, device_id))
-
-        if not where_clauses:
-            return []
-
-        inner_select = (
-            "SELECT MAX(last_seen) mls, user_id, device_id FROM user_ips "
-            "WHERE %(where)s "
-            "GROUP BY user_id, device_id"
-        ) % {"where": " OR ".join(where_clauses)}
-
-        sql = (
-            "SELECT %(retcols)s FROM user_ips "
-            "JOIN (%(inner_select)s) ips ON"
-            "    user_ips.last_seen = ips.mls AND"
-            "    user_ips.user_id = ips.user_id AND"
-            "    (user_ips.device_id = ips.device_id OR"
-            "         (user_ips.device_id IS NULL AND ips.device_id IS NULL)"
-            "    )"
-        ) % {
-            "retcols": ",".join("user_ips." + c for c in retcols),
-            "inner_select": inner_select,
-        }
-
-        txn.execute(sql, bindings)
-        return cls.cursor_to_dict(txn)
-
     @defer.inlineCallbacks
     def get_user_ip_and_agents(self, user):
         user_id = user.to_string()
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 09305c3bf..6ac465408 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -55,7 +55,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             {
                 "user_id": user_id,
                 "device_id": "device_id",
-                "access_token": "access_token",
                 "ip": "ip",
                 "user_agent": "user_agent",
                 "last_seen": 12345678000,

From 691a70190b76aa29481f6299580b71160068ef8e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 23 Sep 2019 16:04:41 +0100
Subject: [PATCH 083/276] Newsfile

---
 changelog.d/6089.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6089.misc

diff --git a/changelog.d/6089.misc b/changelog.d/6089.misc
new file mode 100644
index 000000000..fa3c197c5
--- /dev/null
+++ b/changelog.d/6089.misc
@@ -0,0 +1 @@
+Move last seen info into devices table.

From acb62a7cc6973618397a868289b5881f1c3c1ec3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 23 Sep 2019 16:50:31 +0100
Subject: [PATCH 084/276] Test background update

---
 tests/storage/test_client_ips.py | 79 ++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 6ac465408..76fe65b59 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -200,6 +200,85 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         active = self.get_success(self.store.user_last_seen_monthly_active(user_id))
         self.assertTrue(active)
 
+    def test_devices_last_seen_bg_update(self):
+        # First make sure we have completed all updates.
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        # Insert a user IP
+        user_id = "@user:id"
+        self.get_success(
+            self.store.insert_client_ip(
+                user_id, "access_token", "ip", "user_agent", "device_id"
+            )
+        )
+
+        # Force persisting to disk
+        self.reactor.advance(200)
+
+        # But clear the associated entry in devices table
+        self.get_success(
+            self.store._simple_update(
+                table="devices",
+                keyvalues={"user_id": user_id, "device_id": "device_id"},
+                updatevalues={"last_seen": None, "ip": None, "user_agent": None},
+                desc="test_devices_last_seen_bg_update",
+            )
+        )
+
+        # We should now get nulls when querying
+        result = self.get_success(
+            self.store.get_last_client_ip_by_device(user_id, "device_id")
+        )
+
+        r = result[(user_id, "device_id")]
+        self.assertDictContainsSubset(
+            {
+                "user_id": user_id,
+                "device_id": "device_id",
+                "ip": None,
+                "user_agent": None,
+                "last_seen": None,
+            },
+            r,
+        )
+
+        # Register the background update to run again.
+        self.get_success(
+            self.store._simple_insert(
+                table="background_updates",
+                values={
+                    "update_name": "devices_last_seen",
+                    "progress_json": "{}",
+                    "depends_on": None,
+                },
+            )
+        )
+
+        # ... and tell the DataStore that it hasn't finished all updates yet
+        self.store._all_done = False
+
+        # Now let's actually drive the updates to completion
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        # We should now get the correct result again
+        result = self.get_success(
+            self.store.get_last_client_ip_by_device(user_id, "device_id")
+        )
+
+        r = result[(user_id, "device_id")]
+        self.assertDictContainsSubset(
+            {
+                "user_id": user_id,
+                "device_id": "device_id",
+                "ip": "ip",
+                "user_agent": "user_agent",
+                "last_seen": 0,
+            },
+            r,
+        )
+
 
 class ClientIpAuthTestCase(unittest.HomeserverTestCase):
 

From 2858d10671f889796ca79712e29b8f35b8c9cd98 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 23 Sep 2019 17:22:01 +0100
Subject: [PATCH 085/276] Fix the return value in the
 users_set_deactivated_flag background job

---
 synapse/storage/registration.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 805411a6b..92dd42fb8 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -860,18 +860,18 @@ class RegistrationStore(
             )
 
             if batch_size > len(rows):
-                return True
+                return (True, rows_processed_nb)
             else:
-                return False
+                return (True, rows_processed_nb)
 
-        end = yield self.runInteraction(
+        end, nb_processed = yield self.runInteraction(
             "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn
         )
 
         if end:
             yield self._end_background_update("users_set_deactivated_flag")
 
-        return batch_size
+        return nb_processed
 
     @defer.inlineCallbacks
     def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms):

From a2a09d42dd0ac45eb56ea1df6d1416baf961228b Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 23 Sep 2019 17:22:55 +0100
Subject: [PATCH 086/276] Changelog

---
 changelog.d/6092.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6092.bugfix

diff --git a/changelog.d/6092.bugfix b/changelog.d/6092.bugfix
new file mode 100644
index 000000000..01a7498ec
--- /dev/null
+++ b/changelog.d/6092.bugfix
@@ -0,0 +1 @@
+Fix the logged number of updated items for the users_set_deactivated_flag background update.

From 323d685bf743a660339be574aba68c0a63cb6483 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 23 Sep 2019 17:23:49 +0100
Subject: [PATCH 087/276] Typo

---
 synapse/storage/registration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 92dd42fb8..4c84d804f 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -862,7 +862,7 @@ class RegistrationStore(
             if batch_size > len(rows):
                 return (True, rows_processed_nb)
             else:
-                return (True, rows_processed_nb)
+                return (False, rows_processed_nb)
 
         end, nb_processed = yield self.runInteraction(
             "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn

From 2c99c634532a62fa3479c1f90929b3eabe7880bc Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 18:49:00 +0200
Subject: [PATCH 088/276] Add POST submit_token endpoint for MSISDN (#6078)

First part of solving #6076
---
 changelog.d/6078.feature                |  1 +
 synapse/handlers/identity.py            | 34 ++++++++++++++++++
 synapse/rest/client/v2_alpha/account.py | 47 +++++++++++++++++++++++--
 3 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6078.feature

diff --git a/changelog.d/6078.feature b/changelog.d/6078.feature
new file mode 100644
index 000000000..fae1e5232
--- /dev/null
+++ b/changelog.d/6078.feature
@@ -0,0 +1 @@
+Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an account_threepid_handler.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index d50d485e0..af6f59194 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -491,6 +491,40 @@ class IdentityHandler(BaseHandler):
 
         return validation_session
 
+    @defer.inlineCallbacks
+    def proxy_msisdn_submit_token(self, id_server, client_secret, sid, token):
+        """Proxy a POST submitToken request to an identity server for verification purposes
+
+        Args:
+            id_server (str): The identity server URL to contact
+
+            client_secret (str): Secret provided by the client
+
+            sid (str): The ID of the session
+
+            token (str): The verification token
+
+        Raises:
+            SynapseError: If we failed to contact the identity server
+
+        Returns:
+            Deferred[dict]: The response dict from the identity server
+        """
+        body = {"client_secret": client_secret, "sid": sid, "token": token}
+
+        try:
+            return (
+                yield self.http_client.post_json_get_json(
+                    id_server + "/_matrix/identity/api/v1/validate/msisdn/submitToken",
+                    body,
+                )
+            )
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
+        except HttpResponseException as e:
+            logger.warning("Error contacting msisdn account_threepid_delegate: %s", e)
+            raise SynapseError(400, "Error contacting the identity server")
+
 
 def create_id_access_token_header(id_access_token):
     """Create an Authorization header for passing to SimpleHttpClient as the header value
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index b8c48dc8f..f99676fd3 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -524,7 +524,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
         return 200, ret
 
 
-class AddThreepidSubmitTokenServlet(RestServlet):
+class AddThreepidEmailSubmitTokenServlet(RestServlet):
     """Handles 3PID validation token submission for adding an email to a user's account"""
 
     PATTERNS = client_patterns(
@@ -600,6 +600,48 @@ class AddThreepidSubmitTokenServlet(RestServlet):
         finish_request(request)
 
 
+class AddThreepidMsisdnSubmitTokenServlet(RestServlet):
+    """Handles 3PID validation token submission for adding a phone number to a user's
+    account
+    """
+
+    PATTERNS = client_patterns(
+        "/add_threepid/msisdn/submit_token$", releases=(), unstable=True
+    )
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super().__init__()
+        self.config = hs.config
+        self.clock = hs.get_clock()
+        self.store = hs.get_datastore()
+        self.identity_handler = hs.get_handlers().identity_handler
+
+    @defer.inlineCallbacks
+    def on_POST(self, request):
+        if not self.config.account_threepid_delegate_msisdn:
+            raise SynapseError(
+                400,
+                "This homeserver is not validating phone numbers. Use an identity server "
+                "instead.",
+            )
+
+        body = parse_json_object_from_request(request)
+        assert_params_in_dict(body, ["client_secret", "sid", "token"])
+
+        # Proxy submit_token request to msisdn threepid delegate
+        response = yield self.identity_handler.proxy_msisdn_submit_token(
+            self.config.account_threepid_delegate_msisdn,
+            body["client_secret"],
+            body["sid"],
+            body["token"],
+        )
+        return 200, response
+
+
 class ThreepidRestServlet(RestServlet):
     PATTERNS = client_patterns("/account/3pid$")
 
@@ -807,7 +849,8 @@ def register_servlets(hs, http_server):
     DeactivateAccountRestServlet(hs).register(http_server)
     EmailThreepidRequestTokenRestServlet(hs).register(http_server)
     MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
-    AddThreepidSubmitTokenServlet(hs).register(http_server)
+    AddThreepidEmailSubmitTokenServlet(hs).register(http_server)
+    AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server)
     ThreepidRestServlet(hs).register(http_server)
     ThreepidAddRestServlet(hs).register(http_server)
     ThreepidBindRestServlet(hs).register(http_server)

From b38aa82b83334573e40cb56f076eaf820c51c9ba Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 19:52:43 +0200
Subject: [PATCH 089/276] Add m.require_identity_server to /versions
 unstable_flags (#5972)

As MSC2263 states, m.require_identity_server must be set to false when it does not require an identity server to be provided by the client for the purposes of email registration or password reset.

Adds an m.require_identity_server flag to /versionss unstable_flags section. This will advertise that Synapse no longer needs id_server as a parameter.
---
 changelog.d/5972.misc           |  1 +
 synapse/rest/client/versions.py | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/5972.misc

diff --git a/changelog.d/5972.misc b/changelog.d/5972.misc
new file mode 100644
index 000000000..1dc217e89
--- /dev/null
+++ b/changelog.d/5972.misc
@@ -0,0 +1 @@
+Add m.require_identity_server flag to /version's unstable_features.
\ No newline at end of file
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 0058b6b45..3c9ec59d7 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -48,7 +48,19 @@ class VersionsRestServlet(RestServlet):
                     "r0.5.0",
                 ],
                 # as per MSC1497:
-                "unstable_features": {"m.lazy_load_members": True},
+                "unstable_features": {
+                    "m.lazy_load_members": True,
+                    # Advertise to clients that they need not include an `id_server`
+                    # parameter during registration or password reset, as Synapse now decides
+                    # itself which identity server to use (or none at all).
+                    #
+                    # This is also used by a client when they wish to bind a 3PID to their
+                    # account, but not bind it to an identity server, the endpoint for which
+                    # also requires `id_server`. If the homeserver is handling 3PID
+                    # verification itself, there is no need to ask the user for `id_server` to
+                    # be supplied.
+                    "m.require_identity_server": False,
+                },
             },
         )
 

From 1ea3ed76201de678c8c19c568bb3456ae4989a97 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 21:19:25 +0200
Subject: [PATCH 090/276] Add m.id_access_token to /versions unstable_features
 (MSC2264) (#5974)

Adds a flag to /versions' unstable_features section indicating that this Synapse understands what an id_access_token is, as per MSC2264.

Fixes #5927
---
 changelog.d/5974.feature        | 1 +
 synapse/rest/client/versions.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/5974.feature

diff --git a/changelog.d/5974.feature b/changelog.d/5974.feature
new file mode 100644
index 000000000..387a444fc
--- /dev/null
+++ b/changelog.d/5974.feature
@@ -0,0 +1 @@
+Add m.id_access_token to unstable_features in /versions as per MSC2264.
\ No newline at end of file
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 3c9ec59d7..fdab0ddb4 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -50,6 +50,9 @@ class VersionsRestServlet(RestServlet):
                 # as per MSC1497:
                 "unstable_features": {
                     "m.lazy_load_members": True,
+                    # as per MSC2190, as amended by MSC2264
+                    # to be removed in r0.6.0
+                    "m.id_access_token": True,
                     # Advertise to clients that they need not include an `id_server`
                     # parameter during registration or password reset, as Synapse now decides
                     # itself which identity server to use (or none at all).

From e08ea43463bacd5efacbf6c790c6be0f3cd06ce6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 21:23:20 +0200
Subject: [PATCH 091/276] Use the federation blacklist for requests to
 untrusted Identity Servers (#6000)

Uses a SimpleHttpClient instance equipped with the federation_ip_range_blacklist list for requests to identity servers provided by user input. Does not use a blacklist when contacting identity servers specified by account_threepid_delegates. The homeserver trusts the latter and we don't want to prevent homeserver admins from specifying delegates that are on internal IP addresses.

Fixes #5935
---
 changelog.d/6000.feature        |  1 +
 docs/sample_config.yaml         |  3 +++
 synapse/config/server.py        |  3 +++
 synapse/handlers/identity.py    | 18 +++++++++++++++---
 synapse/handlers/room_member.py |  7 ++++++-
 5 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6000.feature

diff --git a/changelog.d/6000.feature b/changelog.d/6000.feature
new file mode 100644
index 000000000..0a159bd10
--- /dev/null
+++ b/changelog.d/6000.feature
@@ -0,0 +1 @@
+Apply the federation blacklist to requests to identity servers.
\ No newline at end of file
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 61d9f09a9..e53b979c3 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -110,6 +110,9 @@ pid_file: DATADIR/homeserver.pid
 # blacklist IP address CIDR ranges. If this option is not specified, or
 # specified with an empty list, no ip range blacklist will be enforced.
 #
+# As of Synapse v1.4.0 this option also affects any outbound requests to identity
+# servers provided by user input.
+#
 # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
 # listed here, since they correspond to unroutable addresses.)
 #
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 7f8d31595..419787a89 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -545,6 +545,9 @@ class ServerConfig(Config):
         # blacklist IP address CIDR ranges. If this option is not specified, or
         # specified with an empty list, no ip range blacklist will be enforced.
         #
+        # As of Synapse v1.4.0 this option also affects any outbound requests to identity
+        # servers provided by user input.
+        #
         # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
         # listed here, since they correspond to unroutable addresses.)
         #
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index af6f59194..264bdc218 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -31,6 +31,7 @@ from synapse.api.errors import (
     SynapseError,
 )
 from synapse.config.emailconfig import ThreepidBehaviour
+from synapse.http.client import SimpleHttpClient
 from synapse.util.stringutils import random_string
 
 from ._base import BaseHandler
@@ -42,7 +43,12 @@ class IdentityHandler(BaseHandler):
     def __init__(self, hs):
         super(IdentityHandler, self).__init__(hs)
 
-        self.http_client = hs.get_simple_http_client()
+        self.http_client = SimpleHttpClient(hs)
+        # We create a blacklisting instance of SimpleHttpClient for contacting identity
+        # servers specified by clients
+        self.blacklisting_http_client = SimpleHttpClient(
+            hs, ip_blacklist=hs.config.federation_ip_range_blacklist
+        )
         self.federation_http_client = hs.get_http_client()
         self.hs = hs
 
@@ -143,7 +149,9 @@ class IdentityHandler(BaseHandler):
             bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
 
         try:
-            data = yield self.http_client.post_json_get_json(
+            # Use the blacklisting http client as this call is only to identity servers
+            # provided by a client
+            data = yield self.blacklisting_http_client.post_json_get_json(
                 bind_url, bind_data, headers=headers
             )
 
@@ -246,7 +254,11 @@ class IdentityHandler(BaseHandler):
         headers = {b"Authorization": auth_headers}
 
         try:
-            yield self.http_client.post_json_get_json(url, content, headers)
+            # Use the blacklisting http client as this call is only to identity servers
+            # provided by a client
+            yield self.blacklisting_http_client.post_json_get_json(
+                url, content, headers
+            )
             changed = True
         except HttpResponseException as e:
             changed = False
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 39df0f128..94cd0cf3e 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -31,6 +31,7 @@ from synapse import types
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError
 from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header
+from synapse.http.client import SimpleHttpClient
 from synapse.types import RoomID, UserID
 from synapse.util.async_helpers import Linearizer
 from synapse.util.distributor import user_joined_room, user_left_room
@@ -62,7 +63,11 @@ class RoomMemberHandler(object):
         self.auth = hs.get_auth()
         self.state_handler = hs.get_state_handler()
         self.config = hs.config
-        self.simple_http_client = hs.get_simple_http_client()
+        # We create a blacklisting instance of SimpleHttpClient for contacting identity
+        # servers specified by clients
+        self.simple_http_client = SimpleHttpClient(
+            hs, ip_blacklist=hs.config.federation_ip_range_blacklist
+        )
 
         self.federation_handler = hs.get_handlers().federation_handler
         self.directory_handler = hs.get_handlers().directory_handler

From 2b071a2ff1ce59c5b7a4930c471470c739c5efe2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 21:46:34 +0200
Subject: [PATCH 092/276] Add an unstable feature flag for separate add/bind
 3pid APIs (#6044)

Add a m.separate_add_and_bind flag set to True. See MSC2290's Backward Compatibility section for details.
---
 changelog.d/6044.feature        | 1 +
 synapse/rest/client/versions.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/6044.feature

diff --git a/changelog.d/6044.feature b/changelog.d/6044.feature
new file mode 100644
index 000000000..7dc05d484
--- /dev/null
+++ b/changelog.d/6044.feature
@@ -0,0 +1 @@
+Add an unstable feature flag for separate add/bind 3pid APIs.
\ No newline at end of file
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index fdab0ddb4..1044ae7b4 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -63,6 +63,8 @@ class VersionsRestServlet(RestServlet):
                     # verification itself, there is no need to ask the user for `id_server` to
                     # be supplied.
                     "m.require_identity_server": False,
+                    # as per MSC2290
+                    "m.separate_add_and_bind": True,
                 },
             },
         )

From 50776261e1565afe45a1cfd4a991c24110c2e519 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 23 Sep 2019 22:21:03 +0200
Subject: [PATCH 093/276] Add submit_url response parameter to msisdn
 /requestToken (#6079)

Second part of solving #6076
Fixes #6076

We return a submit_url parameter on calls to POST */msisdn/requestToken so that clients know where to submit token information to.
---
 changelog.d/6079.feature       |  1 +
 docs/sample_config.yaml        |  2 ++
 synapse/config/registration.py |  2 ++
 synapse/handlers/identity.py   | 12 +++++++++++-
 4 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6079.feature

diff --git a/changelog.d/6079.feature b/changelog.d/6079.feature
new file mode 100644
index 000000000..bcbb49ac5
--- /dev/null
+++ b/changelog.d/6079.feature
@@ -0,0 +1 @@
+Add `submit_url` response parameter to `*/msisdn/requestToken` endpoints.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index bd208b17d..46af6edf1 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -940,6 +940,8 @@ uploads_path: "DATADIR/uploads"
 # by the Matrix Identity Service API specification:
 # https://matrix.org/docs/spec/identity_service/latest
 #
+# If a delegate is specified, the config option public_baseurl must also be filled out.
+#
 account_threepid_delegates:
     #email: https://example.com     # Delegate email sending to example.org
     #msisdn: http://localhost:8090  # Delegate SMS sending to this local process
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index d4654e99b..bef89e2bf 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -293,6 +293,8 @@ class RegistrationConfig(Config):
         # by the Matrix Identity Service API specification:
         # https://matrix.org/docs/spec/identity_service/latest
         #
+        # If a delegate is specified, the config option public_baseurl must also be filled out.
+        #
         account_threepid_delegates:
             #email: https://example.com     # Delegate email sending to example.org
             #msisdn: http://localhost:8090  # Delegate SMS sending to this local process
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 264bdc218..1f16afd14 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -452,13 +452,23 @@ class IdentityHandler(BaseHandler):
                 id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken",
                 params,
             )
-            return data
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
         except TimeoutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
+        assert self.hs.config.public_baseurl
+
+        # we need to tell the client to send the token back to us, since it doesn't
+        # otherwise know where to send it, so add submit_url response parameter
+        # (see also MSC2078)
+        data["submit_url"] = (
+            self.hs.config.public_baseurl
+            + "_matrix/client/unstable/add_threepid/msisdn/submit_token"
+        )
+        return data
+
     @defer.inlineCallbacks
     def validate_threepid_session(self, client_secret, sid):
         """Validates a threepid session with only the client secret and session ID

From a25b66d3f9a02b2cba5430339e9bf45f335becbb Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 24 Sep 2019 11:15:08 +0100
Subject: [PATCH 094/276] docstrings and comments

---
 synapse/config/saml2_config.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 14539fdb2..be9f04d78 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -20,20 +20,32 @@ from synapse.util.module_loader import load_python_module
 from ._base import Config, ConfigError
 
 
-def _dict_merge(merge_dict, into_dct):
+def _dict_merge(merge_dict, into_dict):
+    """Do a deep merge of two dicts
+
+    Recursively merges `merge_dict` into `into_dict`:
+      * For keys where both `merge_dict` and `into_dict` have a dict value, the values
+        are recursively merged
+      * For all other keys, the values in `into_dict` (if any) are overwritten with
+        the value from `merge_dict`.
+
+    Args:
+        merge_dict (dict): dict to merge
+        into_dict (dict): target dict
+    """
     for k, v in merge_dict.items():
-        if k not in into_dct:
-            into_dct[k] = v
+        if k not in into_dict:
+            into_dict[k] = v
             continue
 
-        current_val = into_dct[k]
+        current_val = into_dict[k]
 
         if isinstance(v, dict) and isinstance(current_val, dict):
             _dict_merge(v, current_val)
             continue
 
         # otherwise we just overwrite
-        into_dct[k] = v
+        into_dict[k] = v
 
 
 class SAML2Config(Config):
@@ -53,12 +65,14 @@ class SAML2Config(Config):
         self.saml2_enabled = True
 
         saml2_config_dict = self._default_saml_config_dict()
-        _dict_merge(saml2_config.get("sp_config", {}), saml2_config_dict)
+        _dict_merge(
+            merge_dict=saml2_config.get("sp_config", {}), into_dict=saml2_config_dict
+        )
 
         config_path = saml2_config.get("config_path", None)
         if config_path is not None:
             mod = load_python_module(config_path)
-            _dict_merge(mod.CONFIG, saml2_config_dict)
+            _dict_merge(merge_dict=mod.CONFIG, into_dict=saml2_config_dict)
 
         import saml2.config
 

From f02f14e09a9f4ce703fa1091ad40c64957811736 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 24 Sep 2019 14:39:07 +0100
Subject: [PATCH 095/276] Fix logging

---
 synapse/storage/background_updates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index e5f0668f0..9522acd97 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -218,7 +218,7 @@ class BackgroundUpdateStore(SQLBaseStore):
         duration_ms = time_stop - time_start
 
         logger.info(
-            "Updating %r. Updated %r items in %rms."
+            "Running background update %r. Processed %r items in %rms."
             " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)",
             update_name,
             items_updated,

From 40fb00f5b7a8a9df15169900df218df19423b93e Mon Sep 17 00:00:00 2001
From: "J. Ryan Stinnett" <jryans@gmail.com>
Date: Tue, 24 Sep 2019 14:39:50 +0100
Subject: [PATCH 096/276] Add sid to next_link for email validation (#6097)

---
 changelog.d/6097.bugfix      |  1 +
 synapse/handlers/identity.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 changelog.d/6097.bugfix

diff --git a/changelog.d/6097.bugfix b/changelog.d/6097.bugfix
new file mode 100644
index 000000000..750a8ecf0
--- /dev/null
+++ b/changelog.d/6097.bugfix
@@ -0,0 +1 @@
+Add sid to next_link for email validation.
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 1f16afd14..6d42a1aed 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -18,6 +18,7 @@
 """Utilities for interacting with Identity Servers"""
 
 import logging
+import urllib
 
 from canonicaljson import json
 
@@ -328,6 +329,15 @@ class IdentityHandler(BaseHandler):
             # Generate a session id
             session_id = random_string(16)
 
+        if next_link:
+            # Manipulate the next_link to add the sid, because the caller won't get
+            # it until we send a response, by which time we've sent the mail.
+            if "?" in next_link:
+                next_link += "&"
+            else:
+                next_link += "?"
+            next_link += "sid=" + urllib.parse.quote(session_id)
+
         # Generate a new validation token
         token = random_string(32)
 

From 12fe2a29bc4b15f667022c4ceb5428405ca3da9d Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 24 Sep 2019 14:43:38 +0100
Subject: [PATCH 097/276] Incorporate review

---
 synapse/storage/registration.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 4c84d804f..c3acc8eca 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -844,7 +844,7 @@ class RegistrationStore(
             rows = self.cursor_to_dict(txn)
 
             if not rows:
-                return True
+                return True, 0
 
             rows_processed_nb = 0
 
@@ -860,9 +860,9 @@ class RegistrationStore(
             )
 
             if batch_size > len(rows):
-                return (True, rows_processed_nb)
+                return True, len(rows)
             else:
-                return (False, rows_processed_nb)
+                return False, len(rows)
 
         end, nb_processed = yield self.runInteraction(
             "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn

From 367158a609d18b6dbd143f8bee0529e743d5b5a4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 24 Sep 2019 14:16:16 +0100
Subject: [PATCH 098/276] Add wrap_as_background_process decorator.

This does the same thing as `run_as_background_process` but means we
don't need to create superfluous functions.
---
 synapse/metrics/background_process_metrics.py | 29 ++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index edd6b42db..b24e2fab4 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -15,6 +15,8 @@
 
 import logging
 import threading
+from asyncio import iscoroutine
+from functools import wraps
 
 import six
 
@@ -197,7 +199,15 @@ def run_as_background_process(desc, func, *args, **kwargs):
                 _background_processes.setdefault(desc, set()).add(proc)
 
             try:
-                yield func(*args, **kwargs)
+                # We ensureDeferred here to handle coroutines
+                result = func(*args, **kwargs)
+
+                # We need this check because ensureDeferred doesn't like when
+                # func doesn't return a Deferred or coroutine.
+                if iscoroutine(result):
+                    result = defer.ensureDeferred(result)
+
+                return (yield result)
             except Exception:
                 logger.exception("Background process '%s' threw an exception", desc)
             finally:
@@ -208,3 +218,20 @@ def run_as_background_process(desc, func, *args, **kwargs):
 
     with PreserveLoggingContext():
         return run()
+
+
+def wrap_as_background_process(desc):
+    """Decorator that wraps a function that gets called as a background
+    process.
+
+    Equivalent of calling the function with `run_as_background_process`
+    """
+
+    def wrap_as_background_process_inner(func):
+        @wraps(func)
+        def wrap_as_background_process_inner_2(*args, **kwargs):
+            return run_as_background_process(desc, func, *args, **kwargs)
+
+        return wrap_as_background_process_inner_2
+
+    return wrap_as_background_process_inner

From 2135c198d17b41297511a2fc3b39551d160069b2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 24 Sep 2019 14:18:31 +0100
Subject: [PATCH 099/276] Add has_completed_background_update

This allows checking if a specific background update has completed.
---
 synapse/storage/background_updates.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index e5f0668f0..3fc25cd82 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -140,7 +140,7 @@ class BackgroundUpdateStore(SQLBaseStore):
             "background_updates",
             keyvalues=None,
             retcol="1",
-            desc="check_background_updates",
+            desc="has_completed_background_updates",
         )
         if not updates:
             self._all_done = True
@@ -148,6 +148,29 @@ class BackgroundUpdateStore(SQLBaseStore):
 
         return False
 
+    async def has_completed_background_update(self, update_name):
+        """Check if the given background update has finished running.
+
+        Returns:
+            Deferred[bool]
+        """
+
+        if self._all_done:
+            return True
+
+        if update_name in self._background_update_queue:
+            return False
+
+        update_exists = await self._simple_select_one_onecol(
+            "background_updates",
+            keyvalues={"update_name": update_name},
+            retcol="1",
+            desc="has_completed_background_update",
+            allow_none=True,
+        )
+
+        return not update_exists
+
     @defer.inlineCallbacks
     def do_next_background_update(self, desired_duration_ms):
         """Does some amount of work on the next queued background update

From 242017db8b7b57be28a019ecbba1619d75d54889 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 24 Sep 2019 15:20:40 +0100
Subject: [PATCH 100/276] Prune rows in user_ips older than configured period

Defaults to pruning everything older than 28d.
---
 docs/sample_config.yaml       |  6 ++++
 synapse/config/server.py      | 13 ++++++++
 synapse/storage/client_ips.py | 62 ++++++++++++++++++++++++++++++-----
 3 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 61d9f09a9..cc6035c83 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -313,6 +313,12 @@ listeners:
 #
 redaction_retention_period: 7d
 
+# How long to track users' last seen time and IPs in the database.
+#
+# Defaults to `28d`. Set to `null` to disable.
+#
+#user_ips_max_age: 14d
+
 
 ## TLS ##
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 7f8d31595..655e7487a 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -172,6 +172,13 @@ class ServerConfig(Config):
         else:
             self.redaction_retention_period = None
 
+        # How long to keep entries in the `users_ips` table.
+        user_ips_max_age = config.get("user_ips_max_age", "28d")
+        if user_ips_max_age is not None:
+            self.user_ips_max_age = self.parse_duration(user_ips_max_age)
+        else:
+            self.user_ips_max_age = None
+
         # Options to disable HS
         self.hs_disabled = config.get("hs_disabled", False)
         self.hs_disabled_message = config.get("hs_disabled_message", "")
@@ -735,6 +742,12 @@ class ServerConfig(Config):
         # Defaults to `7d`. Set to `null` to disable.
         #
         redaction_retention_period: 7d
+
+        # How long to track users' last seen time and IPs in the database.
+        #
+        # Defaults to `28d`. Set to `null` to disable.
+        #
+        #user_ips_max_age: 14d
         """
             % locals()
         )
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index a4e6d9dbe..176c812b1 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -19,7 +19,7 @@ from six import iteritems
 
 from twisted.internet import defer
 
-from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.util.caches import CACHE_SIZE_FACTOR
 
 from . import background_updates
@@ -42,6 +42,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
 
         super(ClientIpStore, self).__init__(db_conn, hs)
 
+        self.user_ips_max_age = hs.config.user_ips_max_age
+
         self.register_background_index_update(
             "user_ips_device_index",
             index_name="user_ips_device_id",
@@ -100,6 +102,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             "before", "shutdown", self._update_client_ips_batch
         )
 
+        if self.user_ips_max_age:
+            self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
+
     @defer.inlineCallbacks
     def _remove_user_ip_nonunique(self, progress, batch_size):
         def f(conn):
@@ -319,20 +324,19 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
 
         self._batch_row_update[key] = (user_agent, device_id, now)
 
+    @wrap_as_background_process("update_client_ips")
     def _update_client_ips_batch(self):
 
         # If the DB pool has already terminated, don't try updating
         if not self.hs.get_db_pool().running:
             return
 
-        def update():
-            to_update = self._batch_row_update
-            self._batch_row_update = {}
-            return self.runInteraction(
-                "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
-            )
+        to_update = self._batch_row_update
+        self._batch_row_update = {}
 
-        return run_as_background_process("update_client_ips", update)
+        return self.runInteraction(
+            "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
+        )
 
     def _update_client_ips_batch_txn(self, txn, to_update):
         if "user_ips" in self._unsafe_to_upsert_tables or (
@@ -496,3 +500,45 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             yield self._end_background_update("devices_last_seen")
 
         return updated
+
+    @wrap_as_background_process("prune_old_user_ips")
+    async def _prune_old_user_ips(self):
+        """Removes entries in user IPs older than the configured period.
+        """
+
+        if not self.user_ips_max_age:
+            # Nothing to do
+            return
+
+        if not await self.has_completed_background_update("devices_last_seen"):
+            # Only start pruning if we have finished populating the devices
+            # last seen info.
+            return
+
+        # We do a slightly funky SQL delete to ensure we don't try and delete
+        # too much at once (as the table may be very large from before we
+        # started pruning).
+        #
+        # This works by finding the max last_seen that is less than the given
+        # time, but has no more than N rows before it, deleting all rows with
+        # a lesser last_seen time. (We COALESCE so that the sub-SELECT always
+        # returns exactly one row).
+        sql = """
+            DELETE FROM user_ips
+            WHERE last_seen <= (
+                SELECT COALESCE(MAX(last_seen), -1)
+                FROM (
+                    SELECT last_seen FROM user_ips
+                    WHERE last_seen <= ?
+                    ORDER BY last_seen ASC
+                    LIMIT 5000
+                ) AS u
+            )
+        """
+
+        timestamp = self.clock.time_msec() - self.user_ips_max_age
+
+        def _prune_old_user_ips_txn(txn):
+            txn.execute(sql, (timestamp,))
+
+        await self.runInteraction("_prune_old_user_ips", _prune_old_user_ips_txn)

From faac453f08046ddf00b39b90ba255f774b75c253 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 24 Sep 2019 15:51:42 +0100
Subject: [PATCH 101/276] Test that pruning of old user IPs works

---
 tests/storage/test_client_ips.py | 71 ++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 76fe65b59..afac5dec7 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -279,6 +279,77 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             r,
         )
 
+    def test_old_user_ips_pruned(self):
+        # First make sure we have completed all updates.
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        # Insert a user IP
+        user_id = "@user:id"
+        self.get_success(
+            self.store.insert_client_ip(
+                user_id, "access_token", "ip", "user_agent", "device_id"
+            )
+        )
+
+        # Force persisting to disk
+        self.reactor.advance(200)
+
+        # We should see that in the DB
+        result = self.get_success(
+            self.store._simple_select_list(
+                table="user_ips",
+                keyvalues={"user_id": user_id},
+                retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"],
+                desc="get_user_ip_and_agents",
+            )
+        )
+
+        self.assertEqual(
+            result,
+            [
+                {
+                    "access_token": "access_token",
+                    "ip": "ip",
+                    "user_agent": "user_agent",
+                    "device_id": "device_id",
+                    "last_seen": 0,
+                }
+            ],
+        )
+
+        # Now advance by a couple of months
+        self.reactor.advance(60 * 24 * 60 * 60)
+
+        # We should get no results.
+        result = self.get_success(
+            self.store._simple_select_list(
+                table="user_ips",
+                keyvalues={"user_id": user_id},
+                retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"],
+                desc="get_user_ip_and_agents",
+            )
+        )
+
+        self.assertEqual(result, [])
+
+        # But we should still get the correct values for the device
+        result = self.get_success(
+            self.store.get_last_client_ip_by_device(user_id, "device_id")
+        )
+
+        r = result[(user_id, "device_id")]
+        self.assertDictContainsSubset(
+            {
+                "user_id": user_id,
+                "device_id": "device_id",
+                "ip": "ip",
+                "user_agent": "user_agent",
+                "last_seen": 0,
+            },
+            r,
+        )
+
 
 class ClientIpAuthTestCase(unittest.HomeserverTestCase):
 

From f8b02c54207e5e99fcd95cb3e19a11423768e696 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 24 Sep 2019 15:59:43 +0100
Subject: [PATCH 102/276] Newsfile

---
 changelog.d/6098.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6098.feature

diff --git a/changelog.d/6098.feature b/changelog.d/6098.feature
new file mode 100644
index 000000000..f3c693c06
--- /dev/null
+++ b/changelog.d/6098.feature
@@ -0,0 +1 @@
+Add support for pruning old rows in `user_ips` table.

From 566ac40939404649d58c053e97dba75810f95339 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 24 Sep 2019 17:01:09 +0100
Subject: [PATCH 103/276] remove unused parameter to get_user_id_by_threepid
 (#6099)

Added in #5377, apparently in error
---
 changelog.d/6099.misc           | 1 +
 synapse/storage/registration.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6099.misc

diff --git a/changelog.d/6099.misc b/changelog.d/6099.misc
new file mode 100644
index 000000000..8415c6759
--- /dev/null
+++ b/changelog.d/6099.misc
@@ -0,0 +1 @@
+Remove unused parameter to get_user_id_by_threepid.
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 805411a6b..5cf2c893a 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -495,7 +495,7 @@ class RegistrationWorkerStore(SQLBaseStore):
         )
 
     @defer.inlineCallbacks
-    def get_user_id_by_threepid(self, medium, address, require_verified=False):
+    def get_user_id_by_threepid(self, medium, address):
         """Returns user id from threepid
 
         Args:

From 8004d6ca2faf0f2f843fcdcaf225d7bcab847503 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 25 Sep 2019 11:32:05 +0100
Subject: [PATCH 104/276] Refactor code for calculating registration flows
 (#6106)

because, frankly, it looked like it was written by an axe-murderer.

This should be a non-functional change, except that where `m.login.dummy` was
previously advertised *before* `m.login.terms`, it will now be advertised
afterwards. AFAICT that should have no effect, and will be more consistent with
the flows that involve passing a 3pid.
---
 changelog.d/6106.misc                       |   1 +
 synapse/rest/client/v2_alpha/register.py    | 124 ++++++++++----------
 tests/rest/client/v2_alpha/test_register.py |  79 +++++++++++--
 tests/test_terms_auth.py                    |  24 ++--
 4 files changed, 145 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/6106.misc

diff --git a/changelog.d/6106.misc b/changelog.d/6106.misc
new file mode 100644
index 000000000..d73209177
--- /dev/null
+++ b/changelog.d/6106.misc
@@ -0,0 +1 @@
+Refactor code for calculating registration flows.
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 135a70808..e3f3d9126 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -16,6 +16,7 @@
 
 import hmac
 import logging
+from typing import List, Union
 
 from six import string_types
 
@@ -31,8 +32,11 @@ from synapse.api.errors import (
     ThreepidValidationError,
     UnrecognizedRequestError,
 )
+from synapse.config.captcha import CaptchaConfig
+from synapse.config.consent_config import ConsentConfig
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.ratelimiting import FederationRateLimitConfig
+from synapse.config.registration import RegistrationConfig
 from synapse.config.server import is_threepid_reserved
 from synapse.http.server import finish_request
 from synapse.http.servlet import (
@@ -371,6 +375,8 @@ class RegisterRestServlet(RestServlet):
         self.ratelimiter = hs.get_registration_ratelimiter()
         self.clock = hs.get_clock()
 
+        self._registration_flows = _calculate_registration_flows(hs.config)
+
     @interactive_auth_handler
     @defer.inlineCallbacks
     def on_POST(self, request):
@@ -491,69 +497,8 @@ class RegisterRestServlet(RestServlet):
                 assigned_user_id=registered_user_id,
             )
 
-        # FIXME: need a better error than "no auth flow found" for scenarios
-        # where we required 3PID for registration but the user didn't give one
-        require_email = "email" in self.hs.config.registrations_require_3pid
-        require_msisdn = "msisdn" in self.hs.config.registrations_require_3pid
-
-        show_msisdn = True
-        if self.hs.config.disable_msisdn_registration:
-            show_msisdn = False
-            require_msisdn = False
-
-        flows = []
-        if self.hs.config.enable_registration_captcha:
-            # only support 3PIDless registration if no 3PIDs are required
-            if not require_email and not require_msisdn:
-                # Also add a dummy flow here, otherwise if a client completes
-                # recaptcha first we'll assume they were going for this flow
-                # and complete the request, when they could have been trying to
-                # complete one of the flows with email/msisdn auth.
-                flows.extend([[LoginType.RECAPTCHA, LoginType.DUMMY]])
-            # only support the email-only flow if we don't require MSISDN 3PIDs
-            if not require_msisdn:
-                flows.extend([[LoginType.RECAPTCHA, LoginType.EMAIL_IDENTITY]])
-
-            if show_msisdn:
-                # only support the MSISDN-only flow if we don't require email 3PIDs
-                if not require_email:
-                    flows.extend([[LoginType.RECAPTCHA, LoginType.MSISDN]])
-                # always let users provide both MSISDN & email
-                flows.extend(
-                    [[LoginType.RECAPTCHA, LoginType.MSISDN, LoginType.EMAIL_IDENTITY]]
-                )
-        else:
-            # only support 3PIDless registration if no 3PIDs are required
-            if not require_email and not require_msisdn:
-                flows.extend([[LoginType.DUMMY]])
-            # only support the email-only flow if we don't require MSISDN 3PIDs
-            if not require_msisdn:
-                flows.extend([[LoginType.EMAIL_IDENTITY]])
-
-            if show_msisdn:
-                # only support the MSISDN-only flow if we don't require email 3PIDs
-                if not require_email or require_msisdn:
-                    flows.extend([[LoginType.MSISDN]])
-                # always let users provide both MSISDN & email
-                flows.extend([[LoginType.MSISDN, LoginType.EMAIL_IDENTITY]])
-
-        # Append m.login.terms to all flows if we're requiring consent
-        if self.hs.config.user_consent_at_registration:
-            new_flows = []
-            for flow in flows:
-                inserted = False
-                # m.login.terms should go near the end but before msisdn or email auth
-                for i, stage in enumerate(flow):
-                    if stage == LoginType.EMAIL_IDENTITY or stage == LoginType.MSISDN:
-                        flow.insert(i, LoginType.TERMS)
-                        inserted = True
-                        break
-                if not inserted:
-                    flow.append(LoginType.TERMS)
-            flows.extend(new_flows)
-
         auth_result, params, session_id = yield self.auth_handler.check_auth(
-            flows, body, self.hs.get_ip_from_request(request)
+            self._registration_flows, body, self.hs.get_ip_from_request(request)
         )
 
         # Check that we're not trying to register a denied 3pid.
@@ -716,6 +661,61 @@ class RegisterRestServlet(RestServlet):
         )
 
 
+def _calculate_registration_flows(
+    # technically `config` has to provide *all* of these interfaces, not just one
+    config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig],
+) -> List[List[str]]:
+    """Get a suitable flows list for registration
+
+    Args:
+        config: server configuration
+
+    Returns: a list of supported flows
+    """
+    # FIXME: need a better error than "no auth flow found" for scenarios
+    # where we required 3PID for registration but the user didn't give one
+    require_email = "email" in config.registrations_require_3pid
+    require_msisdn = "msisdn" in config.registrations_require_3pid
+
+    show_msisdn = True
+    if config.disable_msisdn_registration:
+        show_msisdn = False
+        require_msisdn = False
+
+    flows = []
+
+    # only support 3PIDless registration if no 3PIDs are required
+    if not require_email and not require_msisdn:
+        # Add a dummy step here, otherwise if a client completes
+        # recaptcha first we'll assume they were going for this flow
+        # and complete the request, when they could have been trying to
+        # complete one of the flows with email/msisdn auth.
+        flows.append([LoginType.DUMMY])
+
+    # only support the email-only flow if we don't require MSISDN 3PIDs
+    if not require_msisdn:
+        flows.append([LoginType.EMAIL_IDENTITY])
+
+    # only support the MSISDN-only flow if we don't require email 3PIDs
+    if show_msisdn and not require_email:
+        flows.append([LoginType.MSISDN])
+
+    if show_msisdn:
+        flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY])
+
+    # Prepend m.login.terms to all flows if we're requiring consent
+    if config.user_consent_at_registration:
+        for flow in flows:
+            flow.insert(0, LoginType.TERMS)
+
+    # Prepend recaptcha to all flows if we're requiring captcha
+    if config.enable_registration_captcha:
+        for flow in flows:
+            flow.insert(0, LoginType.RECAPTCHA)
+
+    return flows
+
+
 def register_servlets(hs, http_server):
     EmailRegisterRequestTokenRestServlet(hs).register(http_server)
     MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py
index ab4d7d70d..bc2dc4797 100644
--- a/tests/rest/client/v2_alpha/test_register.py
+++ b/tests/rest/client/v2_alpha/test_register.py
@@ -34,19 +34,12 @@ from tests import unittest
 class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
     servlets = [register.register_servlets]
+    url = b"/_matrix/client/r0/register"
 
-    def make_homeserver(self, reactor, clock):
-
-        self.url = b"/_matrix/client/r0/register"
-
-        self.hs = self.setup_test_homeserver()
-        self.hs.config.enable_registration = True
-        self.hs.config.registrations_require_3pid = []
-        self.hs.config.auto_join_rooms = []
-        self.hs.config.enable_registration_captcha = False
-        self.hs.config.allow_guest_access = True
-
-        return self.hs
+    def default_config(self, name="test"):
+        config = super().default_config(name)
+        config["allow_guest_access"] = True
+        return config
 
     def test_POST_appservice_registration_valid(self):
         user_id = "@as_user_kermit:test"
@@ -199,6 +192,68 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
         self.assertEquals(channel.result["code"], b"200", channel.result)
 
+    def test_advertised_flows(self):
+        request, channel = self.make_request(b"POST", self.url, b"{}")
+        self.render(request)
+        self.assertEquals(channel.result["code"], b"401", channel.result)
+        flows = channel.json_body["flows"]
+
+        # with the stock config, we expect all four combinations of 3pid
+        self.assertCountEqual(
+            [
+                ["m.login.dummy"],
+                ["m.login.email.identity"],
+                ["m.login.msisdn"],
+                ["m.login.msisdn", "m.login.email.identity"],
+            ],
+            (f["stages"] for f in flows),
+        )
+
+    @unittest.override_config(
+        {
+            "enable_registration_captcha": True,
+            "user_consent": {
+                "version": "1",
+                "template_dir": "/",
+                "require_at_registration": True,
+            },
+        }
+    )
+    def test_advertised_flows_captcha_and_terms(self):
+        request, channel = self.make_request(b"POST", self.url, b"{}")
+        self.render(request)
+        self.assertEquals(channel.result["code"], b"401", channel.result)
+        flows = channel.json_body["flows"]
+
+        self.assertCountEqual(
+            [
+                ["m.login.recaptcha", "m.login.terms", "m.login.dummy"],
+                ["m.login.recaptcha", "m.login.terms", "m.login.email.identity"],
+                ["m.login.recaptcha", "m.login.terms", "m.login.msisdn"],
+                [
+                    "m.login.recaptcha",
+                    "m.login.terms",
+                    "m.login.msisdn",
+                    "m.login.email.identity",
+                ],
+            ],
+            (f["stages"] for f in flows),
+        )
+
+    @unittest.override_config(
+        {"registrations_require_3pid": ["email"], "disable_msisdn_registration": True}
+    )
+    def test_advertised_flows_no_msisdn_email_required(self):
+        request, channel = self.make_request(b"POST", self.url, b"{}")
+        self.render(request)
+        self.assertEquals(channel.result["code"], b"401", channel.result)
+        flows = channel.json_body["flows"]
+
+        # with the stock config, we expect all four combinations of 3pid
+        self.assertCountEqual(
+            [["m.login.email.identity"]], (f["stages"] for f in flows)
+        )
+
 
 class AccountValidityTestCase(unittest.HomeserverTestCase):
 
diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py
index 52739fbab..5ec5d2b35 100644
--- a/tests/test_terms_auth.py
+++ b/tests/test_terms_auth.py
@@ -28,6 +28,21 @@ from tests import unittest
 class TermsTestCase(unittest.HomeserverTestCase):
     servlets = [register_servlets]
 
+    def default_config(self, name="test"):
+        config = super().default_config(name)
+        config.update(
+            {
+                "public_baseurl": "https://example.org/",
+                "user_consent": {
+                    "version": "1.0",
+                    "policy_name": "My Cool Privacy Policy",
+                    "template_dir": "/",
+                    "require_at_registration": True,
+                },
+            }
+        )
+        return config
+
     def prepare(self, reactor, clock, hs):
         self.clock = MemoryReactorClock()
         self.hs_clock = Clock(self.clock)
@@ -35,17 +50,8 @@ class TermsTestCase(unittest.HomeserverTestCase):
         self.registration_handler = Mock()
         self.auth_handler = Mock()
         self.device_handler = Mock()
-        hs.config.enable_registration = True
-        hs.config.registrations_require_3pid = []
-        hs.config.auto_join_rooms = []
-        hs.config.enable_registration_captcha = False
 
     def test_ui_auth(self):
-        self.hs.config.user_consent_at_registration = True
-        self.hs.config.user_consent_policy_name = "My Cool Privacy Policy"
-        self.hs.config.public_baseurl = "https://example.org/"
-        self.hs.config.user_consent_version = "1.0"
-
         # Do a UI auth request
         request, channel = self.make_request(b"POST", self.url, b"{}")
         self.render(request)

From fde4ce22135b06d05b646141f90cdf3038ed4fe2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 11:32:41 +0100
Subject: [PATCH 105/276] Don't create new span for get_user_by_req

We don't actually care about what happens in `get_user_by_req` and
having it as a separate span means that the entity tag isn't added to
the servlet spans, making it harder to search.
---
 synapse/api/auth.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 9e445cd80..59852bdbd 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -179,7 +179,6 @@ class Auth(object):
     def get_public_keys(self, invite_event):
         return event_auth.get_public_keys(invite_event)
 
-    @opentracing.trace
     @defer.inlineCallbacks
     def get_user_by_req(
         self, request, allow_guest=False, rights="access", allow_expired=False

From 2cd98812ba338eefe83fee4ae2390d00f5499de9 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 25 Sep 2019 11:33:03 +0100
Subject: [PATCH 106/276] Refactor the user-interactive auth handling (#6105)

Pull the checkers out to their own classes, rather than having them lost in a
massive 1000-line class which does everything.

This is also preparation for some more intelligent advertising of flows, as per #6100
---
 changelog.d/6105.misc                   |   1 +
 synapse/handlers/auth.py                | 141 ++--------------
 synapse/handlers/ui_auth/__init__.py    |  22 +++
 synapse/handlers/ui_auth/checkers.py    | 216 ++++++++++++++++++++++++
 tests/rest/client/v2_alpha/test_auth.py |  26 +--
 5 files changed, 265 insertions(+), 141 deletions(-)
 create mode 100644 changelog.d/6105.misc
 create mode 100644 synapse/handlers/ui_auth/__init__.py
 create mode 100644 synapse/handlers/ui_auth/checkers.py

diff --git a/changelog.d/6105.misc b/changelog.d/6105.misc
new file mode 100644
index 000000000..2e838a35c
--- /dev/null
+++ b/changelog.d/6105.misc
@@ -0,0 +1 @@
+Refactor the user-interactive auth handling.
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 374372b69..f920c2f6c 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -21,10 +21,8 @@ import unicodedata
 import attr
 import bcrypt
 import pymacaroons
-from canonicaljson import json
 
 from twisted.internet import defer
-from twisted.web.client import PartialDownloadError
 
 import synapse.util.stringutils as stringutils
 from synapse.api.constants import LoginType
@@ -38,7 +36,8 @@ from synapse.api.errors import (
     UserDeactivatedError,
 )
 from synapse.api.ratelimiting import Ratelimiter
-from synapse.config.emailconfig import ThreepidBehaviour
+from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS
+from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker
 from synapse.logging.context import defer_to_thread
 from synapse.module_api import ModuleApi
 from synapse.types import UserID
@@ -58,13 +57,12 @@ class AuthHandler(BaseHandler):
             hs (synapse.server.HomeServer):
         """
         super(AuthHandler, self).__init__(hs)
-        self.checkers = {
-            LoginType.RECAPTCHA: self._check_recaptcha,
-            LoginType.EMAIL_IDENTITY: self._check_email_identity,
-            LoginType.MSISDN: self._check_msisdn,
-            LoginType.DUMMY: self._check_dummy_auth,
-            LoginType.TERMS: self._check_terms_auth,
-        }
+
+        self.checkers = {}  # type: dict[str, UserInteractiveAuthChecker]
+        for auth_checker_class in INTERACTIVE_AUTH_CHECKERS:
+            inst = auth_checker_class(hs)
+            self.checkers[inst.AUTH_TYPE] = inst
+
         self.bcrypt_rounds = hs.config.bcrypt_rounds
 
         # This is not a cache per se, but a store of all current sessions that
@@ -292,7 +290,7 @@ class AuthHandler(BaseHandler):
             sess["creds"] = {}
         creds = sess["creds"]
 
-        result = yield self.checkers[stagetype](authdict, clientip)
+        result = yield self.checkers[stagetype].check_auth(authdict, clientip)
         if result:
             creds[stagetype] = result
             self._save_session(sess)
@@ -363,7 +361,7 @@ class AuthHandler(BaseHandler):
         login_type = authdict["type"]
         checker = self.checkers.get(login_type)
         if checker is not None:
-            res = yield checker(authdict, clientip=clientip)
+            res = yield checker.check_auth(authdict, clientip=clientip)
             return res
 
         # build a v1-login-style dict out of the authdict and fall back to the
@@ -376,125 +374,6 @@ class AuthHandler(BaseHandler):
         (canonical_id, callback) = yield self.validate_login(user_id, authdict)
         return canonical_id
 
-    @defer.inlineCallbacks
-    def _check_recaptcha(self, authdict, clientip, **kwargs):
-        try:
-            user_response = authdict["response"]
-        except KeyError:
-            # Client tried to provide captcha but didn't give the parameter:
-            # bad request.
-            raise LoginError(
-                400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED
-            )
-
-        logger.info(
-            "Submitting recaptcha response %s with remoteip %s", user_response, clientip
-        )
-
-        # TODO: get this from the homeserver rather than creating a new one for
-        # each request
-        try:
-            client = self.hs.get_simple_http_client()
-            resp_body = yield client.post_urlencoded_get_json(
-                self.hs.config.recaptcha_siteverify_api,
-                args={
-                    "secret": self.hs.config.recaptcha_private_key,
-                    "response": user_response,
-                    "remoteip": clientip,
-                },
-            )
-        except PartialDownloadError as pde:
-            # Twisted is silly
-            data = pde.response
-            resp_body = json.loads(data)
-
-        if "success" in resp_body:
-            # Note that we do NOT check the hostname here: we explicitly
-            # intend the CAPTCHA to be presented by whatever client the
-            # user is using, we just care that they have completed a CAPTCHA.
-            logger.info(
-                "%s reCAPTCHA from hostname %s",
-                "Successful" if resp_body["success"] else "Failed",
-                resp_body.get("hostname"),
-            )
-            if resp_body["success"]:
-                return True
-        raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)
-
-    def _check_email_identity(self, authdict, **kwargs):
-        return self._check_threepid("email", authdict, **kwargs)
-
-    def _check_msisdn(self, authdict, **kwargs):
-        return self._check_threepid("msisdn", authdict)
-
-    def _check_dummy_auth(self, authdict, **kwargs):
-        return defer.succeed(True)
-
-    def _check_terms_auth(self, authdict, **kwargs):
-        return defer.succeed(True)
-
-    @defer.inlineCallbacks
-    def _check_threepid(self, medium, authdict, **kwargs):
-        if "threepid_creds" not in authdict:
-            raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM)
-
-        threepid_creds = authdict["threepid_creds"]
-
-        identity_handler = self.hs.get_handlers().identity_handler
-
-        logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
-        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            if medium == "email":
-                threepid = yield identity_handler.threepid_from_creds(
-                    self.hs.config.account_threepid_delegate_email, threepid_creds
-                )
-            elif medium == "msisdn":
-                threepid = yield identity_handler.threepid_from_creds(
-                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
-                )
-            else:
-                raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,))
-        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
-            row = yield self.store.get_threepid_validation_session(
-                medium,
-                threepid_creds["client_secret"],
-                sid=threepid_creds["sid"],
-                validated=True,
-            )
-
-            threepid = (
-                {
-                    "medium": row["medium"],
-                    "address": row["address"],
-                    "validated_at": row["validated_at"],
-                }
-                if row
-                else None
-            )
-
-            if row:
-                # Valid threepid returned, delete from the db
-                yield self.store.delete_threepid_session(threepid_creds["sid"])
-        else:
-            raise SynapseError(
-                400, "Password resets are not enabled on this homeserver"
-            )
-
-        if not threepid:
-            raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)
-
-        if threepid["medium"] != medium:
-            raise LoginError(
-                401,
-                "Expecting threepid of type '%s', got '%s'"
-                % (medium, threepid["medium"]),
-                errcode=Codes.UNAUTHORIZED,
-            )
-
-        threepid["threepid_creds"] = authdict["threepid_creds"]
-
-        return threepid
-
     def _get_params_recaptcha(self):
         return {"public_key": self.hs.config.recaptcha_public_key}
 
diff --git a/synapse/handlers/ui_auth/__init__.py b/synapse/handlers/ui_auth/__init__.py
new file mode 100644
index 000000000..824f37f8f
--- /dev/null
+++ b/synapse/handlers/ui_auth/__init__.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module implements user-interactive auth verification.
+
+TODO: move more stuff out of AuthHandler in here.
+
+"""
+
+from synapse.handlers.ui_auth.checkers import INTERACTIVE_AUTH_CHECKERS  # noqa: F401
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
new file mode 100644
index 000000000..fd633b7b0
--- /dev/null
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from canonicaljson import json
+
+from twisted.internet import defer
+from twisted.web.client import PartialDownloadError
+
+from synapse.api.constants import LoginType
+from synapse.api.errors import Codes, LoginError, SynapseError
+from synapse.config.emailconfig import ThreepidBehaviour
+
+logger = logging.getLogger(__name__)
+
+
+class UserInteractiveAuthChecker:
+    """Abstract base class for an interactive auth checker"""
+
+    def __init__(self, hs):
+        pass
+
+    def check_auth(self, authdict, clientip):
+        """Given the authentication dict from the client, attempt to check this step
+
+        Args:
+            authdict (dict): authentication dictionary from the client
+            clientip (str): The IP address of the client.
+
+        Raises:
+            SynapseError if authentication failed
+
+        Returns:
+            Deferred: the result of authentication (to pass back to the client?)
+        """
+        raise NotImplementedError()
+
+
+class DummyAuthChecker(UserInteractiveAuthChecker):
+    AUTH_TYPE = LoginType.DUMMY
+
+    def check_auth(self, authdict, clientip):
+        return defer.succeed(True)
+
+
+class TermsAuthChecker(UserInteractiveAuthChecker):
+    AUTH_TYPE = LoginType.TERMS
+
+    def check_auth(self, authdict, clientip):
+        return defer.succeed(True)
+
+
+class RecaptchaAuthChecker(UserInteractiveAuthChecker):
+    AUTH_TYPE = LoginType.RECAPTCHA
+
+    def __init__(self, hs):
+        super().__init__(hs)
+        self._http_client = hs.get_simple_http_client()
+        self._url = hs.config.recaptcha_siteverify_api
+        self._secret = hs.config.recaptcha_private_key
+
+    @defer.inlineCallbacks
+    def check_auth(self, authdict, clientip):
+        try:
+            user_response = authdict["response"]
+        except KeyError:
+            # Client tried to provide captcha but didn't give the parameter:
+            # bad request.
+            raise LoginError(
+                400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED
+            )
+
+        logger.info(
+            "Submitting recaptcha response %s with remoteip %s", user_response, clientip
+        )
+
+        # TODO: get this from the homeserver rather than creating a new one for
+        # each request
+        try:
+            resp_body = yield self._http_client.post_urlencoded_get_json(
+                self._url,
+                args={
+                    "secret": self._secret,
+                    "response": user_response,
+                    "remoteip": clientip,
+                },
+            )
+        except PartialDownloadError as pde:
+            # Twisted is silly
+            data = pde.response
+            resp_body = json.loads(data)
+
+        if "success" in resp_body:
+            # Note that we do NOT check the hostname here: we explicitly
+            # intend the CAPTCHA to be presented by whatever client the
+            # user is using, we just care that they have completed a CAPTCHA.
+            logger.info(
+                "%s reCAPTCHA from hostname %s",
+                "Successful" if resp_body["success"] else "Failed",
+                resp_body.get("hostname"),
+            )
+            if resp_body["success"]:
+                return True
+        raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)
+
+
+class _BaseThreepidAuthChecker:
+    def __init__(self, hs):
+        self.hs = hs
+        self.store = hs.get_datastore()
+
+    @defer.inlineCallbacks
+    def _check_threepid(self, medium, authdict):
+        if "threepid_creds" not in authdict:
+            raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM)
+
+        threepid_creds = authdict["threepid_creds"]
+
+        identity_handler = self.hs.get_handlers().identity_handler
+
+        logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
+        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+            if medium == "email":
+                threepid = yield identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_email, threepid_creds
+                )
+            elif medium == "msisdn":
+                threepid = yield identity_handler.threepid_from_creds(
+                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+                )
+            else:
+                raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,))
+        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            row = yield self.store.get_threepid_validation_session(
+                medium,
+                threepid_creds["client_secret"],
+                sid=threepid_creds["sid"],
+                validated=True,
+            )
+
+            threepid = (
+                {
+                    "medium": row["medium"],
+                    "address": row["address"],
+                    "validated_at": row["validated_at"],
+                }
+                if row
+                else None
+            )
+
+            if row:
+                # Valid threepid returned, delete from the db
+                yield self.store.delete_threepid_session(threepid_creds["sid"])
+        else:
+            raise SynapseError(
+                400, "Password resets are not enabled on this homeserver"
+            )
+
+        if not threepid:
+            raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)
+
+        if threepid["medium"] != medium:
+            raise LoginError(
+                401,
+                "Expecting threepid of type '%s', got '%s'"
+                % (medium, threepid["medium"]),
+                errcode=Codes.UNAUTHORIZED,
+            )
+
+        threepid["threepid_creds"] = authdict["threepid_creds"]
+
+        return threepid
+
+
+class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker):
+    AUTH_TYPE = LoginType.EMAIL_IDENTITY
+
+    def __init__(self, hs):
+        UserInteractiveAuthChecker.__init__(self, hs)
+        _BaseThreepidAuthChecker.__init__(self, hs)
+
+    def check_auth(self, authdict, clientip):
+        return self._check_threepid("email", authdict)
+
+
+class MsisdnAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker):
+    AUTH_TYPE = LoginType.MSISDN
+
+    def __init__(self, hs):
+        UserInteractiveAuthChecker.__init__(self, hs)
+        _BaseThreepidAuthChecker.__init__(self, hs)
+
+    def check_auth(self, authdict, clientip):
+        return self._check_threepid("msisdn", authdict)
+
+
+INTERACTIVE_AUTH_CHECKERS = [
+    DummyAuthChecker,
+    TermsAuthChecker,
+    RecaptchaAuthChecker,
+    EmailIdentityAuthChecker,
+    MsisdnAuthChecker,
+]
+"""A list of UserInteractiveAuthChecker classes"""
diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index b9ef46e8f..b6df1396a 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -18,11 +18,22 @@ from twisted.internet.defer import succeed
 
 import synapse.rest.admin
 from synapse.api.constants import LoginType
+from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker
 from synapse.rest.client.v2_alpha import auth, register
 
 from tests import unittest
 
 
+class DummyRecaptchaChecker(UserInteractiveAuthChecker):
+    def __init__(self, hs):
+        super().__init__(hs)
+        self.recaptcha_attempts = []
+
+    def check_auth(self, authdict, clientip):
+        self.recaptcha_attempts.append((authdict, clientip))
+        return succeed(True)
+
+
 class FallbackAuthTests(unittest.HomeserverTestCase):
 
     servlets = [
@@ -44,15 +55,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase):
         return hs
 
     def prepare(self, reactor, clock, hs):
+        self.recaptcha_checker = DummyRecaptchaChecker(hs)
         auth_handler = hs.get_auth_handler()
-
-        self.recaptcha_attempts = []
-
-        def _recaptcha(authdict, clientip):
-            self.recaptcha_attempts.append((authdict, clientip))
-            return succeed(True)
-
-        auth_handler.checkers[LoginType.RECAPTCHA] = _recaptcha
+        auth_handler.checkers[LoginType.RECAPTCHA] = self.recaptcha_checker
 
     @unittest.INFO
     def test_fallback_captcha(self):
@@ -89,8 +94,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase):
         self.assertEqual(request.code, 200)
 
         # The recaptcha handler is called with the response given
-        self.assertEqual(len(self.recaptcha_attempts), 1)
-        self.assertEqual(self.recaptcha_attempts[0][0]["response"], "a")
+        attempts = self.recaptcha_checker.recaptcha_attempts
+        self.assertEqual(len(attempts), 1)
+        self.assertEqual(attempts[0][0]["response"], "a")
 
         # also complete the dummy auth
         request, channel = self.make_request(

From 5d99713854ee0672ba95d72ef13ce1cbcbc781c5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 11:39:15 +0100
Subject: [PATCH 107/276] Add tags for event_id and txn_id in event sending

This will make it easier to search for sending event requests.
---
 synapse/rest/client/v1/room.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index a6a7b3b57..19f150af9 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -44,6 +44,7 @@ from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID
+from synapse.logging.opentracing import set_tag
 
 logger = logging.getLogger(__name__)
 
@@ -81,6 +82,7 @@ class RoomCreateRestServlet(TransactionRestServlet):
         )
 
     def on_PUT(self, request, txn_id):
+        set_tag("txn_id", txn_id)
         return self.txns.fetch_or_execute_request(request, self.on_POST, request)
 
     @defer.inlineCallbacks
@@ -181,6 +183,9 @@ class RoomStateEventRestServlet(TransactionRestServlet):
     def on_PUT(self, request, room_id, event_type, state_key, txn_id=None):
         requester = yield self.auth.get_user_by_req(request)
 
+        if txn_id:
+            set_tag("txn_id", txn_id)
+
         content = parse_json_object_from_request(request)
 
         event_dict = {
@@ -209,6 +214,7 @@ class RoomStateEventRestServlet(TransactionRestServlet):
 
         ret = {}
         if event:
+            set_tag("event_id", event.event_id)
             ret = {"event_id": event.event_id}
         return 200, ret
 
@@ -244,12 +250,15 @@ class RoomSendEventRestServlet(TransactionRestServlet):
             requester, event_dict, txn_id=txn_id
         )
 
+        set_tag("event_id", event.event_id)
         return 200, {"event_id": event.event_id}
 
     def on_GET(self, request, room_id, event_type, txn_id):
         return 200, "Not implemented"
 
     def on_PUT(self, request, room_id, event_type, txn_id):
+        set_tag("txn_id", txn_id)
+
         return self.txns.fetch_or_execute_request(
             request, self.on_POST, request, room_id, event_type, txn_id
         )
@@ -310,6 +319,8 @@ class JoinRoomAliasServlet(TransactionRestServlet):
         return 200, {"room_id": room_id}
 
     def on_PUT(self, request, room_identifier, txn_id):
+        set_tag("txn_id", txn_id)
+
         return self.txns.fetch_or_execute_request(
             request, self.on_POST, request, room_identifier, txn_id
         )
@@ -655,6 +666,8 @@ class RoomForgetRestServlet(TransactionRestServlet):
         return 200, {}
 
     def on_PUT(self, request, room_id, txn_id):
+        set_tag("txn_id", txn_id)
+
         return self.txns.fetch_or_execute_request(
             request, self.on_POST, request, room_id, txn_id
         )
@@ -738,6 +751,8 @@ class RoomMembershipRestServlet(TransactionRestServlet):
         return True
 
     def on_PUT(self, request, room_id, membership_action, txn_id):
+        set_tag("txn_id", txn_id)
+
         return self.txns.fetch_or_execute_request(
             request, self.on_POST, request, room_id, membership_action, txn_id
         )
@@ -771,9 +786,12 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
             txn_id=txn_id,
         )
 
+        set_tag("event_id", event.event_id)
         return 200, {"event_id": event.event_id}
 
     def on_PUT(self, request, room_id, event_id, txn_id):
+        set_tag("txn_id", txn_id)
+
         return self.txns.fetch_or_execute_request(
             request, self.on_POST, request, room_id, event_id, txn_id
         )

From 5c1af6d1b8ea8bad770fe8a70d9badb28dcfb9b9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 11:42:00 +0100
Subject: [PATCH 108/276] Newsfile

---
 changelog.d/6108.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6108.misc

diff --git a/changelog.d/6108.misc b/changelog.d/6108.misc
new file mode 100644
index 000000000..6c3f9460e
--- /dev/null
+++ b/changelog.d/6108.misc
@@ -0,0 +1 @@
+Remove `get_user_by_req` opentracing span and add some tags.

From dc01cad690e3c6cb1ccb57995554dd93ab1636f2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 11:59:00 +0100
Subject: [PATCH 109/276] Add device and appservice tags

---
 synapse/api/auth.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 59852bdbd..cb50579fd 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -211,6 +211,7 @@ class Auth(object):
             if user_id:
                 request.authenticated_entity = user_id
                 opentracing.set_tag("authenticated_entity", user_id)
+                opentracing.set_tag("appservice_id", app_service.id)
 
                 if ip_addr and self.hs.config.track_appservice_user_ips:
                     yield self.store.insert_client_ip(
@@ -262,6 +263,8 @@ class Auth(object):
 
             request.authenticated_entity = user.to_string()
             opentracing.set_tag("authenticated_entity", user.to_string())
+            if device_id:
+                opentracing.set_tag("device_id", device_id)
 
             return synapse.types.create_requester(
                 user, token_id, is_guest, device_id, app_service=app_service

From dc2c97e1a36cc3c2f584223a0d8a3faa810471c0 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 11:59:05 +0100
Subject: [PATCH 110/276] isort

---
 synapse/rest/client/v1/room.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 19f150af9..6bf924ded 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -39,12 +39,12 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.logging.opentracing import set_tag
 from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID
-from synapse.logging.opentracing import set_tag
 
 logger = logging.getLogger(__name__)
 

From 990928abde4f3ccd7d43e6214abd7d36434953a9 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 25 Sep 2019 12:10:26 +0100
Subject: [PATCH 111/276] Stop advertising unsupported flows for registration
 (#6107)

If email or msisdn verification aren't supported, let's stop advertising them
for registration.

Fixes #6100.
---
 changelog.d/6107.bugfix                     |  1 +
 synapse/handlers/auth.py                    | 11 ++++++-
 synapse/handlers/ui_auth/checkers.py        | 26 +++++++++++++++++
 synapse/rest/client/v2_alpha/register.py    | 32 +++++++++++++++++++--
 tests/rest/client/v2_alpha/test_register.py | 29 +++++++++++--------
 5 files changed, 83 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/6107.bugfix

diff --git a/changelog.d/6107.bugfix b/changelog.d/6107.bugfix
new file mode 100644
index 000000000..d4b9516ac
--- /dev/null
+++ b/changelog.d/6107.bugfix
@@ -0,0 +1 @@
+Ensure that servers which are not configured to support email address verification do not offer it in the registration flows.
\ No newline at end of file
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index f920c2f6c..333eb3062 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -61,7 +61,8 @@ class AuthHandler(BaseHandler):
         self.checkers = {}  # type: dict[str, UserInteractiveAuthChecker]
         for auth_checker_class in INTERACTIVE_AUTH_CHECKERS:
             inst = auth_checker_class(hs)
-            self.checkers[inst.AUTH_TYPE] = inst
+            if inst.is_enabled():
+                self.checkers[inst.AUTH_TYPE] = inst
 
         self.bcrypt_rounds = hs.config.bcrypt_rounds
 
@@ -156,6 +157,14 @@ class AuthHandler(BaseHandler):
 
         return params
 
+    def get_enabled_auth_types(self):
+        """Return the enabled user-interactive authentication types
+
+        Returns the UI-Auth types which are supported by the homeserver's current
+        config.
+        """
+        return self.checkers.keys()
+
     @defer.inlineCallbacks
     def check_auth(self, flows, clientdict, clientip):
         """
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index fd633b7b0..ee6922324 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -32,6 +32,13 @@ class UserInteractiveAuthChecker:
     def __init__(self, hs):
         pass
 
+    def is_enabled(self):
+        """Check if the configuration of the homeserver allows this checker to work
+
+        Returns:
+            bool: True if this login type is enabled.
+        """
+
     def check_auth(self, authdict, clientip):
         """Given the authentication dict from the client, attempt to check this step
 
@@ -51,6 +58,9 @@ class UserInteractiveAuthChecker:
 class DummyAuthChecker(UserInteractiveAuthChecker):
     AUTH_TYPE = LoginType.DUMMY
 
+    def is_enabled(self):
+        return True
+
     def check_auth(self, authdict, clientip):
         return defer.succeed(True)
 
@@ -58,6 +68,9 @@ class DummyAuthChecker(UserInteractiveAuthChecker):
 class TermsAuthChecker(UserInteractiveAuthChecker):
     AUTH_TYPE = LoginType.TERMS
 
+    def is_enabled(self):
+        return True
+
     def check_auth(self, authdict, clientip):
         return defer.succeed(True)
 
@@ -67,10 +80,14 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker):
 
     def __init__(self, hs):
         super().__init__(hs)
+        self._enabled = bool(hs.config.recaptcha_private_key)
         self._http_client = hs.get_simple_http_client()
         self._url = hs.config.recaptcha_siteverify_api
         self._secret = hs.config.recaptcha_private_key
 
+    def is_enabled(self):
+        return self._enabled
+
     @defer.inlineCallbacks
     def check_auth(self, authdict, clientip):
         try:
@@ -191,6 +208,12 @@ class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChec
         UserInteractiveAuthChecker.__init__(self, hs)
         _BaseThreepidAuthChecker.__init__(self, hs)
 
+    def is_enabled(self):
+        return self.hs.config.threepid_behaviour_email in (
+            ThreepidBehaviour.REMOTE,
+            ThreepidBehaviour.LOCAL,
+        )
+
     def check_auth(self, authdict, clientip):
         return self._check_threepid("email", authdict)
 
@@ -202,6 +225,9 @@ class MsisdnAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker):
         UserInteractiveAuthChecker.__init__(self, hs)
         _BaseThreepidAuthChecker.__init__(self, hs)
 
+    def is_enabled(self):
+        return bool(self.hs.config.account_threepid_delegate_msisdn)
+
     def check_auth(self, authdict, clientip):
         return self._check_threepid("msisdn", authdict)
 
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index e3f3d9126..4f24a124a 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -32,12 +32,14 @@ from synapse.api.errors import (
     ThreepidValidationError,
     UnrecognizedRequestError,
 )
+from synapse.config import ConfigError
 from synapse.config.captcha import CaptchaConfig
 from synapse.config.consent_config import ConsentConfig
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.ratelimiting import FederationRateLimitConfig
 from synapse.config.registration import RegistrationConfig
 from synapse.config.server import is_threepid_reserved
+from synapse.handlers.auth import AuthHandler
 from synapse.http.server import finish_request
 from synapse.http.servlet import (
     RestServlet,
@@ -375,7 +377,9 @@ class RegisterRestServlet(RestServlet):
         self.ratelimiter = hs.get_registration_ratelimiter()
         self.clock = hs.get_clock()
 
-        self._registration_flows = _calculate_registration_flows(hs.config)
+        self._registration_flows = _calculate_registration_flows(
+            hs.config, self.auth_handler
+        )
 
     @interactive_auth_handler
     @defer.inlineCallbacks
@@ -664,11 +668,13 @@ class RegisterRestServlet(RestServlet):
 def _calculate_registration_flows(
     # technically `config` has to provide *all* of these interfaces, not just one
     config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig],
+    auth_handler: AuthHandler,
 ) -> List[List[str]]:
     """Get a suitable flows list for registration
 
     Args:
         config: server configuration
+        auth_handler: authorization handler
 
     Returns: a list of supported flows
     """
@@ -678,10 +684,29 @@ def _calculate_registration_flows(
     require_msisdn = "msisdn" in config.registrations_require_3pid
 
     show_msisdn = True
+    show_email = True
+
     if config.disable_msisdn_registration:
         show_msisdn = False
         require_msisdn = False
 
+    enabled_auth_types = auth_handler.get_enabled_auth_types()
+    if LoginType.EMAIL_IDENTITY not in enabled_auth_types:
+        show_email = False
+        if require_email:
+            raise ConfigError(
+                "Configuration requires email address at registration, but email "
+                "validation is not configured"
+            )
+
+    if LoginType.MSISDN not in enabled_auth_types:
+        show_msisdn = False
+        if require_msisdn:
+            raise ConfigError(
+                "Configuration requires msisdn at registration, but msisdn "
+                "validation is not configured"
+            )
+
     flows = []
 
     # only support 3PIDless registration if no 3PIDs are required
@@ -693,14 +718,15 @@ def _calculate_registration_flows(
         flows.append([LoginType.DUMMY])
 
     # only support the email-only flow if we don't require MSISDN 3PIDs
-    if not require_msisdn:
+    if show_email and not require_msisdn:
         flows.append([LoginType.EMAIL_IDENTITY])
 
     # only support the MSISDN-only flow if we don't require email 3PIDs
     if show_msisdn and not require_email:
         flows.append([LoginType.MSISDN])
 
-    if show_msisdn:
+    if show_email and show_msisdn:
+        # always let users provide both MSISDN & email
         flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY])
 
     # Prepend m.login.terms to all flows if we're requiring consent
diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py
index bc2dc4797..dab87e5ed 100644
--- a/tests/rest/client/v2_alpha/test_register.py
+++ b/tests/rest/client/v2_alpha/test_register.py
@@ -198,16 +198,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEquals(channel.result["code"], b"401", channel.result)
         flows = channel.json_body["flows"]
 
-        # with the stock config, we expect all four combinations of 3pid
-        self.assertCountEqual(
-            [
-                ["m.login.dummy"],
-                ["m.login.email.identity"],
-                ["m.login.msisdn"],
-                ["m.login.msisdn", "m.login.email.identity"],
-            ],
-            (f["stages"] for f in flows),
-        )
+        # with the stock config, we only expect the dummy flow
+        self.assertCountEqual([["m.login.dummy"]], (f["stages"] for f in flows))
 
     @unittest.override_config(
         {
@@ -217,9 +209,13 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
                 "template_dir": "/",
                 "require_at_registration": True,
             },
+            "account_threepid_delegates": {
+                "email": "https://id_server",
+                "msisdn": "https://id_server",
+            },
         }
     )
-    def test_advertised_flows_captcha_and_terms(self):
+    def test_advertised_flows_captcha_and_terms_and_3pids(self):
         request, channel = self.make_request(b"POST", self.url, b"{}")
         self.render(request)
         self.assertEquals(channel.result["code"], b"401", channel.result)
@@ -241,7 +237,16 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         )
 
     @unittest.override_config(
-        {"registrations_require_3pid": ["email"], "disable_msisdn_registration": True}
+        {
+            "public_baseurl": "https://test_server",
+            "registrations_require_3pid": ["email"],
+            "disable_msisdn_registration": True,
+            "email": {
+                "smtp_host": "mail_server",
+                "smtp_port": 2525,
+                "notif_from": "sender@host",
+            },
+        }
     )
     def test_advertised_flows_no_msisdn_email_required(self):
         request, channel = self.make_request(b"POST", self.url, b"{}")

From 77dc7093a738ec4e172c92b7a53d58aa41bfec0a Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Wed, 25 Sep 2019 12:29:35 +0100
Subject: [PATCH 112/276] Threepid validity checks on msisdns should not be
 dependent on 'threepid_behaviour_email'. (#6104)

Fixes #6103
---
 changelog.d/6104.bugfix              |  1 +
 synapse/handlers/ui_auth/checkers.py | 65 +++++++++++++++-------------
 2 files changed, 36 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/6104.bugfix

diff --git a/changelog.d/6104.bugfix b/changelog.d/6104.bugfix
new file mode 100644
index 000000000..41114a66e
--- /dev/null
+++ b/changelog.d/6104.bugfix
@@ -0,0 +1 @@
+Threepid validity checks on msisdns should not be dependent on 'threepid_behaviour_email'.
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index ee6922324..29aa1e5aa 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -148,42 +148,47 @@ class _BaseThreepidAuthChecker:
         identity_handler = self.hs.get_handlers().identity_handler
 
         logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
-        if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            if medium == "email":
+
+        # msisdns are currently always ThreepidBehaviour.REMOTE
+        if medium == "msisdn":
+            if not self.hs.config.account_threepid_delegate_msisdn:
+                raise SynapseError(
+                    400, "Phone number verification is not enabled on this homeserver"
+                )
+            threepid = yield identity_handler.threepid_from_creds(
+                self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+            )
+        elif medium == "email":
+            if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
+                assert self.hs.config.account_threepid_delegate_email
                 threepid = yield identity_handler.threepid_from_creds(
                     self.hs.config.account_threepid_delegate_email, threepid_creds
                 )
-            elif medium == "msisdn":
-                threepid = yield identity_handler.threepid_from_creds(
-                    self.hs.config.account_threepid_delegate_msisdn, threepid_creds
+            elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+                threepid = None
+                row = yield self.store.get_threepid_validation_session(
+                    medium,
+                    threepid_creds["client_secret"],
+                    sid=threepid_creds["sid"],
+                    validated=True,
                 )
+
+                if row:
+                    threepid = {
+                        "medium": row["medium"],
+                        "address": row["address"],
+                        "validated_at": row["validated_at"],
+                    }
+
+                    # Valid threepid returned, delete from the db
+                    yield self.store.delete_threepid_session(threepid_creds["sid"])
             else:
-                raise SynapseError(400, "Unrecognized threepid medium: %s" % (medium,))
-        elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
-            row = yield self.store.get_threepid_validation_session(
-                medium,
-                threepid_creds["client_secret"],
-                sid=threepid_creds["sid"],
-                validated=True,
-            )
-
-            threepid = (
-                {
-                    "medium": row["medium"],
-                    "address": row["address"],
-                    "validated_at": row["validated_at"],
-                }
-                if row
-                else None
-            )
-
-            if row:
-                # Valid threepid returned, delete from the db
-                yield self.store.delete_threepid_session(threepid_creds["sid"])
+                raise SynapseError(
+                    400, "Email address verification is not enabled on this homeserver"
+                )
         else:
-            raise SynapseError(
-                400, "Password resets are not enabled on this homeserver"
-            )
+            # this can't happen!
+            raise AssertionError("Unrecognized threepid medium: %s" % (medium,))
 
         if not threepid:
             raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)

From 50572db837f3e6a0869e9ec573e02d4af72548ea Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 17:00:23 +0100
Subject: [PATCH 113/276] Use if `is not None`

Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 synapse/storage/client_ips.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index a4e6d9dbe..899668974 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -393,7 +393,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         """
 
         keyvalues = {"user_id": user_id}
-        if device_id:
+        if device_id is not None:
             keyvalues["device_id"] = device_id
 
         res = yield self._simple_select_list(

From 39b50ad42a8cf784e627959e9652589338121ccd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Sep 2019 17:22:33 +0100
Subject: [PATCH 114/276] Review comments

---
 docs/sample_config.yaml               | 2 +-
 synapse/config/server.py              | 2 +-
 synapse/storage/background_updates.py | 5 +----
 synapse/storage/client_ips.py         | 2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index cc6035c83..7902d9ed6 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -315,7 +315,7 @@ redaction_retention_period: 7d
 
 # How long to track users' last seen time and IPs in the database.
 #
-# Defaults to `28d`. Set to `null` to disable.
+# Defaults to `28d`. Set to `null` to disable clearing out of old rows.
 #
 #user_ips_max_age: 14d
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 655e7487a..f8b7b4bef 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -745,7 +745,7 @@ class ServerConfig(Config):
 
         # How long to track users' last seen time and IPs in the database.
         #
-        # Defaults to `28d`. Set to `null` to disable.
+        # Defaults to `28d`. Set to `null` to disable clearing out of old rows.
         #
         #user_ips_max_age: 14d
         """
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 3fc25cd82..30788137a 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -148,11 +148,8 @@ class BackgroundUpdateStore(SQLBaseStore):
 
         return False
 
-    async def has_completed_background_update(self, update_name):
+    async def has_completed_background_update(self, update_name) -> bool:
         """Check if the given background update has finished running.
-
-        Returns:
-            Deferred[bool]
         """
 
         if self._all_done:
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 176c812b1..a4d40dfa1 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -506,7 +506,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         """Removes entries in user IPs older than the configured period.
         """
 
-        if not self.user_ips_max_age:
+        if self.user_ips_max_age is None:
             # Nothing to do
             return
 

From a4f3ca48b5250a1c2c4de8a363f69bbeb0adeefd Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Wed, 25 Sep 2019 17:27:35 +0100
Subject: [PATCH 115/276] Enable cleaning up extremities with dummy events by
 default to prevent undue build up of forward extremities. (#5884)

---
 changelog.d/5884.feature | 1 +
 synapse/config/server.py | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/5884.feature

diff --git a/changelog.d/5884.feature b/changelog.d/5884.feature
new file mode 100644
index 000000000..bfd048939
--- /dev/null
+++ b/changelog.d/5884.feature
@@ -0,0 +1 @@
+Enable cleaning up extremities with dummy events by default to prevent undue build up of forward extremities.
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 419787a89..3a7a49bc9 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -355,10 +355,8 @@ class ServerConfig(Config):
 
         _check_resource_config(self.listeners)
 
-        # An experimental option to try and periodically clean up extremities
-        # by sending dummy events.
         self.cleanup_extremities_with_dummy_events = config.get(
-            "cleanup_extremities_with_dummy_events", False
+            "cleanup_extremities_with_dummy_events", True
         )
 
     def has_tls_listener(self):

From a96318127dc17ee102bcf90821d90b7e6079a85d Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 25 Sep 2019 18:17:39 +0100
Subject: [PATCH 116/276] Update comments and docstring

---
 synapse/metrics/background_process_metrics.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index b24e2fab4..c53d2a0d4 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -175,7 +175,7 @@ def run_as_background_process(desc, func, *args, **kwargs):
 
     Args:
         desc (str): a description for this background process type
-        func: a function, which may return a Deferred
+        func: a function, which may return a Deferred or a coroutine
         args: positional args for func
         kwargs: keyword args for func
 
@@ -199,11 +199,13 @@ def run_as_background_process(desc, func, *args, **kwargs):
                 _background_processes.setdefault(desc, set()).add(proc)
 
             try:
-                # We ensureDeferred here to handle coroutines
                 result = func(*args, **kwargs)
 
-                # We need this check because ensureDeferred doesn't like when
-                # func doesn't return a Deferred or coroutine.
+                # We probably don't have an ensureDeferred in our call stack to handle
+                # coroutine results, so we need to ensureDeferred here.
+                #
+                # But we need this check because ensureDeferred doesn't like being
+                # called on immediate values (as opposed to Deferreds or coroutines).
                 if iscoroutine(result):
                     result = defer.ensureDeferred(result)
 

From 034db2ba2115d935ce62b641b4051e477a454eac Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Thu, 26 Sep 2019 11:47:53 +0100
Subject: [PATCH 117/276] Fix dummy event insertion consent bug (#6053)

Fixes #5905
---
 changelog.d/6053.bugfix                |   1 +
 synapse/handlers/message.py            |  97 +++++++++++----
 synapse/storage/event_federation.py    |  18 ++-
 tests/storage/test_cleanup_extrems.py  | 157 ++++++++++++++++++++++---
 tests/storage/test_event_federation.py |  40 +++++++
 5 files changed, 270 insertions(+), 43 deletions(-)
 create mode 100644 changelog.d/6053.bugfix

diff --git a/changelog.d/6053.bugfix b/changelog.d/6053.bugfix
new file mode 100644
index 000000000..6311157bf
--- /dev/null
+++ b/changelog.d/6053.bugfix
@@ -0,0 +1 @@
+Prevent exceptions being logged when extremity-cleanup events fail due to lack of user consent to the terms of service.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 1f8272784..0f8cce8ff 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -222,6 +222,13 @@ class MessageHandler(object):
         }
 
 
+# The duration (in ms) after which rooms should be removed
+# `_rooms_to_exclude_from_dummy_event_insertion` (with the effect that we will try
+# to generate a dummy event for them once more)
+#
+_DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY = 7 * 24 * 60 * 60 * 1000
+
+
 class EventCreationHandler(object):
     def __init__(self, hs):
         self.hs = hs
@@ -258,6 +265,13 @@ class EventCreationHandler(object):
             self.config.block_events_without_consent_error
         )
 
+        # Rooms which should be excluded from dummy insertion. (For instance,
+        # those without local users who can send events into the room).
+        #
+        # map from room id to time-of-last-attempt.
+        #
+        self._rooms_to_exclude_from_dummy_event_insertion = {}  # type: dict[str, int]
+
         # we need to construct a ConsentURIBuilder here, as it checks that the necessary
         # config options, but *only* if we have a configuration for which we are
         # going to need it.
@@ -888,9 +902,11 @@ class EventCreationHandler(object):
         """Background task to send dummy events into rooms that have a large
         number of extremities
         """
-
+        self._expire_rooms_to_exclude_from_dummy_event_insertion()
         room_ids = yield self.store.get_rooms_with_many_extremities(
-            min_count=10, limit=5
+            min_count=10,
+            limit=5,
+            room_id_filter=self._rooms_to_exclude_from_dummy_event_insertion.keys(),
         )
 
         for room_id in room_ids:
@@ -904,32 +920,61 @@ class EventCreationHandler(object):
             members = yield self.state.get_current_users_in_room(
                 room_id, latest_event_ids=latest_event_ids
             )
+            dummy_event_sent = False
+            for user_id in members:
+                if not self.hs.is_mine_id(user_id):
+                    continue
+                requester = create_requester(user_id)
+                try:
+                    event, context = yield self.create_event(
+                        requester,
+                        {
+                            "type": "org.matrix.dummy_event",
+                            "content": {},
+                            "room_id": room_id,
+                            "sender": user_id,
+                        },
+                        prev_events_and_hashes=prev_events_and_hashes,
+                    )
 
-            user_id = None
-            for member in members:
-                if self.hs.is_mine_id(member):
-                    user_id = member
+                    event.internal_metadata.proactively_send = False
+
+                    yield self.send_nonmember_event(
+                        requester, event, context, ratelimit=False
+                    )
+                    dummy_event_sent = True
                     break
+                except ConsentNotGivenError:
+                    logger.info(
+                        "Failed to send dummy event into room %s for user %s due to "
+                        "lack of consent. Will try another user" % (room_id, user_id)
+                    )
+                except AuthError:
+                    logger.info(
+                        "Failed to send dummy event into room %s for user %s due to "
+                        "lack of power. Will try another user" % (room_id, user_id)
+                    )
 
-            if not user_id:
-                # We don't have a joined user.
-                # TODO: We should do something here to stop the room from
-                # appearing next time.
-                continue
+            if not dummy_event_sent:
+                # Did not find a valid user in the room, so remove from future attempts
+                # Exclusion is time limited, so the room will be rechecked in the future
+                # dependent on _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY
+                logger.info(
+                    "Failed to send dummy event into room %s. Will exclude it from "
+                    "future attempts until cache expires" % (room_id,)
+                )
+                now = self.clock.time_msec()
+                self._rooms_to_exclude_from_dummy_event_insertion[room_id] = now
 
-            requester = create_requester(user_id)
-
-            event, context = yield self.create_event(
-                requester,
-                {
-                    "type": "org.matrix.dummy_event",
-                    "content": {},
-                    "room_id": room_id,
-                    "sender": user_id,
-                },
-                prev_events_and_hashes=prev_events_and_hashes,
+    def _expire_rooms_to_exclude_from_dummy_event_insertion(self):
+        expire_before = self.clock.time_msec() - _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY
+        to_expire = set()
+        for room_id, time in self._rooms_to_exclude_from_dummy_event_insertion.items():
+            if time < expire_before:
+                to_expire.add(room_id)
+        for room_id in to_expire:
+            logger.debug(
+                "Expiring room id %s from dummy event insertion exclusion cache",
+                room_id,
             )
-
-            event.internal_metadata.proactively_send = False
-
-            yield self.send_nonmember_event(requester, event, context, ratelimit=False)
+            del self._rooms_to_exclude_from_dummy_event_insertion[room_id]
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 4f500d893..f5e8c3926 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
 import logging
 import random
 
@@ -190,12 +191,13 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             room_id,
         )
 
-    def get_rooms_with_many_extremities(self, min_count, limit):
+    def get_rooms_with_many_extremities(self, min_count, limit, room_id_filter):
         """Get the top rooms with at least N extremities.
 
         Args:
             min_count (int): The minimum number of extremities
             limit (int): The maximum number of rooms to return.
+            room_id_filter (iterable[str]): room_ids to exclude from the results
 
         Returns:
             Deferred[list]: At most `limit` room IDs that have at least
@@ -203,15 +205,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         """
 
         def _get_rooms_with_many_extremities_txn(txn):
+            where_clause = "1=1"
+            if room_id_filter:
+                where_clause = "room_id NOT IN (%s)" % (
+                    ",".join("?" for _ in room_id_filter),
+                )
+
             sql = """
                 SELECT room_id FROM event_forward_extremities
+                WHERE %s
                 GROUP BY room_id
                 HAVING count(*) > ?
                 ORDER BY count(*) DESC
                 LIMIT ?
-            """
+            """ % (
+                where_clause,
+            )
 
-            txn.execute(sql, (min_count, limit))
+            query_args = list(itertools.chain(room_id_filter, [min_count, limit]))
+            txn.execute(sql, query_args)
             return [room_id for room_id, in txn]
 
         return self.runInteraction(
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index e9e2d5337..34f9c7270 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -14,7 +14,13 @@
 # limitations under the License.
 
 import os.path
+from unittest.mock import patch
 
+from mock import Mock
+
+import synapse.rest.admin
+from synapse.api.constants import EventTypes
+from synapse.rest.client.v1 import login, room
 from synapse.storage import prepare_database
 from synapse.types import Requester, UserID
 
@@ -225,6 +231,14 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
 
 
 class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
+    CONSENT_VERSION = "1"
+    EXTREMITIES_COUNT = 50
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
     def make_homeserver(self, reactor, clock):
         config = self.default_config()
         config["cleanup_extremities_with_dummy_events"] = True
@@ -233,27 +247,19 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
     def prepare(self, reactor, clock, homeserver):
         self.store = homeserver.get_datastore()
         self.room_creator = homeserver.get_room_creation_handler()
+        self.event_creator_handler = homeserver.get_event_creation_handler()
 
         # Create a test user and room
-        self.user = UserID("alice", "test")
+        self.user = UserID.from_string(self.register_user("user1", "password"))
+        self.token1 = self.login("user1", "password")
         self.requester = Requester(self.user, None, False, None, None)
         info = self.get_success(self.room_creator.create_room(self.requester, {}))
         self.room_id = info["room_id"]
+        self.event_creator = homeserver.get_event_creation_handler()
+        homeserver.config.user_consent_version = self.CONSENT_VERSION
 
     def test_send_dummy_event(self):
-        # Create a bushy graph with 50 extremities.
-
-        event_id_start = self.create_and_send_event(self.room_id, self.user)
-
-        for _ in range(50):
-            self.create_and_send_event(
-                self.room_id, self.user, prev_event_ids=[event_id_start]
-            )
-
-        latest_event_ids = self.get_success(
-            self.store.get_latest_event_ids_in_room(self.room_id)
-        )
-        self.assertEqual(len(latest_event_ids), 50)
+        self._create_extremity_rich_graph()
 
         # Pump the reactor repeatedly so that the background updates have a
         # chance to run.
@@ -263,3 +269,126 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
         self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))
+
+    @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0)
+    def test_send_dummy_events_when_insufficient_power(self):
+        self._create_extremity_rich_graph()
+        # Criple power levels
+        self.helper.send_state(
+            self.room_id,
+            EventTypes.PowerLevels,
+            body={"users": {str(self.user): -1}},
+            tok=self.token1,
+        )
+        # Pump the reactor repeatedly so that the background updates have a
+        # chance to run.
+        self.pump(10 * 60)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        # Check that the room has not been pruned
+        self.assertTrue(len(latest_event_ids) > 10)
+
+        # New user with regular levels
+        user2 = self.register_user("user2", "password")
+        token2 = self.login("user2", "password")
+        self.helper.join(self.room_id, user2, tok=token2)
+        self.pump(10 * 60)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))
+
+    @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0)
+    def test_send_dummy_event_without_consent(self):
+        self._create_extremity_rich_graph()
+        self._enable_consent_checking()
+
+        # Pump the reactor repeatedly so that the background updates have a
+        # chance to run. Attempt to add dummy event with user that has not consented
+        # Check that dummy event send fails.
+        self.pump(10 * 60)
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertTrue(len(latest_event_ids) == self.EXTREMITIES_COUNT)
+
+        # Create new user, and add consent
+        user2 = self.register_user("user2", "password")
+        token2 = self.login("user2", "password")
+        self.get_success(
+            self.store.user_set_consent_version(user2, self.CONSENT_VERSION)
+        )
+        self.helper.join(self.room_id, user2, tok=token2)
+
+        # Background updates should now cause a dummy event to be added to the graph
+        self.pump(10 * 60)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))
+
+    @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=250)
+    def test_expiry_logic(self):
+        """Simple test to ensure that _expire_rooms_to_exclude_from_dummy_event_insertion()
+        expires old entries correctly.
+        """
+        self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[
+            "1"
+        ] = 100000
+        self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[
+            "2"
+        ] = 200000
+        self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[
+            "3"
+        ] = 300000
+        self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion()
+        # All entries within time frame
+        self.assertEqual(
+            len(
+                self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion
+            ),
+            3,
+        )
+        # Oldest room to expire
+        self.pump(1)
+        self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion()
+        self.assertEqual(
+            len(
+                self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion
+            ),
+            2,
+        )
+        # All rooms to expire
+        self.pump(2)
+        self.assertEqual(
+            len(
+                self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion
+            ),
+            0,
+        )
+
+    def _create_extremity_rich_graph(self):
+        """Helper method to create bushy graph on demand"""
+
+        event_id_start = self.create_and_send_event(self.room_id, self.user)
+
+        for _ in range(self.EXTREMITIES_COUNT):
+            self.create_and_send_event(
+                self.room_id, self.user, prev_event_ids=[event_id_start]
+            )
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(len(latest_event_ids), 50)
+
+    def _enable_consent_checking(self):
+        """Helper method to enable consent checking"""
+        self.event_creator._block_events_without_consent_error = "No consent from user"
+        consent_uri_builder = Mock()
+        consent_uri_builder.build_user_consent_uri.return_value = "http://example.com"
+        self.event_creator._consent_uri_builder = consent_uri_builder
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 86c7ac350..b58386994 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -75,3 +75,43 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
             el = r[i]
             depth = el[2]
             self.assertLessEqual(5, depth)
+
+    @defer.inlineCallbacks
+    def test_get_rooms_with_many_extremities(self):
+        room1 = "#room1"
+        room2 = "#room2"
+        room3 = "#room3"
+
+        def insert_event(txn, i, room_id):
+            event_id = "$event_%i:local" % i
+            txn.execute(
+                (
+                    "INSERT INTO event_forward_extremities (room_id, event_id) "
+                    "VALUES (?, ?)"
+                ),
+                (room_id, event_id),
+            )
+
+        for i in range(0, 20):
+            yield self.store.runInteraction("insert", insert_event, i, room1)
+            yield self.store.runInteraction("insert", insert_event, i, room2)
+            yield self.store.runInteraction("insert", insert_event, i, room3)
+
+        # Test simple case
+        r = yield self.store.get_rooms_with_many_extremities(5, 5, [])
+        self.assertEqual(len(r), 3)
+
+        # Does filter work?
+
+        r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1])
+        self.assertTrue(room2 in r)
+        self.assertTrue(room3 in r)
+        self.assertEqual(len(r), 2)
+
+        r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
+        self.assertEqual(r, [room3])
+
+        # Does filter and limit work?
+
+        r = yield self.store.get_rooms_with_many_extremities(5, 1, [room1])
+        self.assertTrue(r == [room2] or r == [room3])

From 2927c6bc4c4e0c975a875d7eb5aa736b6abd66cd Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Sep 2019 12:29:59 +0100
Subject: [PATCH 118/276] bump version

---
 synapse/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/__init__.py b/synapse/__init__.py
index 6766ef445..ddfe9ec54 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.3.1"
+__version__ = "1.4.0rc1"

From 1b23f991abb99c50908aca7c4ccfdea0c789c900 Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Thu, 26 Sep 2019 12:30:10 +0100
Subject: [PATCH 119/276] Clarify upgrade notes ahead of 1.4.0 release

---
 UPGRADE.rst          | 195 +++++++++++++++++++++++++++++++++----------
 changelog.d/6027.doc |   1 +
 2 files changed, 153 insertions(+), 43 deletions(-)
 create mode 100644 changelog.d/6027.doc

diff --git a/UPGRADE.rst b/UPGRADE.rst
index 4ede973a0..9562114d5 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -78,53 +78,161 @@ for example:
 Upgrading to v1.4.0
 ===================
 
-Config options
---------------
+New custom templates
+--------------------
 
-**Note: Registration by email address or phone number will not work in this release unless
-some config options are changed from their defaults.**
+If you have configured a custom template directory with the
+``email.template_dir`` option, be aware that there are new templates regarding
+registration and threepid management (see below) that must be included.
 
-This is due to Synapse v1.4.0 now defaulting to sending registration and password reset tokens
-itself. This is for security reasons as well as putting less reliance on identity servers.
-However, currently Synapse only supports sending emails, and does not have support for
-phone-based password reset or account registration. If Synapse is configured to handle these on
-its own, phone-based password resets and registration will be disabled. For Synapse to send
-emails, the ``email`` block of the config must be filled out. If not, then password resets and
-registration via email will be disabled entirely.
+* ``registration.html`` and ``registration.txt``
+* ``registration_success.html`` and ``registration_failure.html``
+* ``add_threepid.html`` and  ``add_threepid.txt``
+* ``add_threepid_failure.html`` and ``add_threepid_success.html``
 
-This release also deprecates the ``email.trust_identity_server_for_password_resets`` option and
-replaces it with the ``account_threepid_delegates`` dictionary. This option defines whether the
-homeserver should delegate an external server (typically an `identity server
-<https://matrix.org/docs/spec/identity_service/r0.2.1>`_) to handle sending password reset or
-registration messages via email and SMS.
-
-If ``email.trust_identity_server_for_password_resets`` is set to ``true``, and
-``account_threepid_delegates.email`` is not set, then the first entry in
-``trusted_third_party_id_servers`` will be used as the account threepid delegate for email.
-This is to ensure compatibility with existing Synapse installs that set up external server
-handling for these tasks before v1.4.0. If ``email.trust_identity_server_for_password_resets``
-is ``true`` and no trusted identity server domains are configured, Synapse will throw an error.
-
-If ``email.trust_identity_server_for_password_resets`` is ``false`` or absent and a threepid
-type in ``account_threepid_delegates`` is not set to a domain, then Synapse will attempt to
-send password reset and registration messages for that type.
-
-Email templates
----------------
-
-If you have configured a custom template directory with the ``email.template_dir`` option, be
-aware that there are new templates regarding registration. ``registration.html`` and
-``registration.txt`` have been added and contain the content that is sent to a client upon
-registering via an email address.
-
-``registration_success.html`` and ``registration_failure.html`` are also new HTML templates
-that will be shown to the user when they click the link in their registration emai , either
-showing them a success or failure page (assuming a redirect URL is not configured).
-
-Synapse will expect these files to exist inside the configured template directory. To view the
-default templates, see `synapse/res/templates
+Synapse will expect these files to exist inside the configured template
+directory, and **will fail to start** if they are absent.
+To view the default templates, see `synapse/res/templates
 <https://github.com/matrix-org/synapse/tree/master/synapse/res/templates>`_.
 
+3pid verification changes
+-------------------------
+
+**Note: As of this release, users will be unable to add phone numbers or email
+addresses to their accounts, without changes to the Synapse configuration. This
+includes adding an email address during registration.**
+
+It is possible for a user to associate an email address or phone number
+with their account, for a number of reasons:
+
+* for use when logging in, as an alternative to the user id.
+* in the case of email, as an alternative contact to help with account recovery.
+* in the case of email, to receive notifications of missed messages.
+
+Before an email address or phone number can be added to a user's account,
+or before such an address is used to carry out a password-reset, Synapse must
+confirm the operation with the owner of the email address or phone number.
+It does this by sending an email or text giving the user a link or token to confirm
+receipt. This process is known as '3pid verification'. ('3pid', or 'threepid',
+stands for third-party identifier, and we use it to refer to external
+identifiers such as email addresses and phone numbers.)
+
+Previous versions of Synapse delegated the task of 3pid verification to an
+identity server by default. In most cases this server is ``vector.im`` or
+``matrix.org``.
+
+In Synapse 1.4.0, for security and privacy reasons, the homeserver will no
+longer delegate this task to an identity server by default. Instead,
+the server administrator will need to explicitly decide how they would like the
+verification messages to be sent.
+
+In the medium term, the ``vector.im`` and ``matrix.org`` identity servers will
+disable support for delegated 3pid verification entirely. However, in order to
+ease the transition, they will retain the capability for a limited
+period. Delegated email verification will be disabled on Monday 2nd December
+2019 (giving roughly 2 months notice). Disabling delegated SMS verification
+will follow some time after that once SMS verification support lands in
+Synapse.
+
+Once delegated 3pid verification support has been disabled in the ``vector.im`` and
+``matrix.org`` identity servers, all Synapse versions that depend on those
+instances will be unable to verify email and phone numbers through them. There
+are no imminent plans to remove delegated 3pid verification from Sydent
+generally. (Sydent is the identity server project that backs the ``vector.im`` and
+``matrix.org`` instances).
+
+Email
+~~~~~
+Following upgrade, to continue verifying email (e.g. as part of the
+registration process), admins can either:-
+
+* Configure Synapse to use an email server.
+* Run or choose an identity server which allows delegated email verification
+  and delegate to it.
+
+Configure SMTP in Synapse
++++++++++++++++++++++++++
+
+To configure an SMTP server for Synapse, modify the configuration section
+headed ``email``, and be sure to have at least the ``smtp_host, smtp_port``
+and ``notif_from`` fields filled out.
+
+You may also need to set ``smtp_user``, ``smtp_pass``, and
+``require_transport_security``.
+
+See the `sample configuration file <docs/sample_config.yaml>`_ for more details
+on these settings.
+
+Delegate email to an identity server
+++++++++++++++++++++++++++++++++++++
+
+Some admins will wish to continue using email verification as part of the
+registration process, but will not immediately have an appropriate SMTP server
+at hand.
+
+To this end, we will continue to support email verification delegation via the
+``vector.im`` and ``matrix.org`` identity servers for two months. Support for
+delegated email verification will be disabled on Monday 2nd December.
+
+The ``account_threepid_delegates`` dictionary defines whether the homeserver
+should delegate an external server (typically an `identity server
+<https://matrix.org/docs/spec/identity_service/r0.2.1>`_) to handle sending
+confirmation messages via email and SMS.
+
+So to delegate email verification, in ``homeserver.yaml``, set
+``account_threepid_delegates.email`` to the base URL of an identity server. For
+example:
+
+.. code:: yaml
+
+   account_threepid_delegates:
+       email: https://example.com     # Delegate email sending to example.com
+
+Note that ``account_threepid_delegates.email`` replaces the deprecated
+``email.trust_identity_server_for_password_resets``: if
+``email.trust_identity_server_for_password_resets`` is set to ``true``, and
+``account_threepid_delegates.email`` is not set, then the first entry in
+``trusted_third_party_id_servers`` will be used as the
+``account_threepid_delegate`` for email. This is to ensure compatibility with
+existing Synapse installs that set up external server handling for these tasks
+before v1.4.0. If ``email.trust_identity_server_for_password_resets`` is
+``true`` and no trusted identity server domains are configured, Synapse will
+report an error and refuse to start.
+
+If ``email.trust_identity_server_for_password_resets`` is ``false`` or absent
+and no ``email`` delegate is configured in ``account_threepid_delegates``,
+then Synapse will send email verification messages itself, using the configured
+SMTP server (see above).
+that type.
+
+Phone numbers
+~~~~~~~~~~~~~
+
+Synapse does not support phone-number verification itself, so the only way to
+maintain the ability for users to add phone numbers to their accounts will be
+by continuing to delegate phone number verification to the ``matrix.org`` and
+``vector.im`` identity servers (or another identity server that supports SMS
+sending).
+
+The ``account_threepid_delegates`` dictionary defines whether the homeserver
+should delegate an external server (typically an `identity server
+<https://matrix.org/docs/spec/identity_service/r0.2.1>`_) to handle sending
+confirmation messages via email and SMS.
+
+So to delegate phone number verification, in ``homeserver.yaml``, set
+``account_threepid_delegates.msisdn`` to the base URL of an identity
+server. For example:
+
+.. code:: yaml
+
+   account_threepid_delegates:
+       msisdn: https://example.com     # Delegate sms sending to example.com
+
+The ``matrix.org`` and ``vector.im`` identity servers will continue to support
+delegated phone number verification via SMS until such time as it is possible
+for admins to configure their servers to perform phone number verification
+directly. More details will follow in a future release.
+
 Rolling back to v1.3.1
 ----------------------
 
@@ -140,7 +248,8 @@ v1.3.1, subject to the following:
   The room statistics are essentially unused in v1.3.1 (in future versions of
   Synapse, they will be used to populate the room directory), so there should
   be no loss of functionality. However, the statistics engine will write errors
-  to the logs, which can be avoided by setting the following in `homeserver.yaml`:
+  to the logs, which can be avoided by setting the following in
+  `homeserver.yaml`:
 
   .. code:: yaml
 
diff --git a/changelog.d/6027.doc b/changelog.d/6027.doc
new file mode 100644
index 000000000..f0af68f3b
--- /dev/null
+++ b/changelog.d/6027.doc
@@ -0,0 +1 @@
+Clarify Synapse 1.4.0 upgrade notes.

From 8b8f8c7b3c6136ea777265fff8052afed2b7031e Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Thu, 26 Sep 2019 12:57:01 +0100
Subject: [PATCH 120/276] Explicitly log when a homeserver does not have a
 trusted key server configured  (#6090)

---
 changelog.d/6090.feature |  1 +
 docs/sample_config.yaml  | 14 ++++++++----
 synapse/config/key.py    | 48 ++++++++++++++++++++++++++++++++++++----
 synapse/config/server.py | 16 +++++++-------
 4 files changed, 63 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/6090.feature

diff --git a/changelog.d/6090.feature b/changelog.d/6090.feature
new file mode 100644
index 000000000..a6da448a1
--- /dev/null
+++ b/changelog.d/6090.feature
@@ -0,0 +1 @@
+Explicitly log when a homeserver does not have the 'trusted_key_servers' config field configured.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 8f801daf3..254e1b17b 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1072,6 +1072,10 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 # This setting supercedes an older setting named `perspectives`. The old format
 # is still supported for backwards-compatibility, but it is deprecated.
 #
+# 'trusted_key_servers' defaults to matrix.org, but using it will generate a
+# warning on start-up. To suppress this warning, set
+# 'suppress_key_server_warning' to true.
+#
 # Options for each entry in the list include:
 #
 #    server_name: the name of the server. required.
@@ -1096,11 +1100,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 #      "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr"
 #  - server_name: "my_other_trusted_server.example.com"
 #
-# The default configuration is:
-#
-#trusted_key_servers:
-#  - server_name: "matrix.org"
+trusted_key_servers:
+  - server_name: "matrix.org"
+
+# Uncomment the following to disable the warning that is emitted when the
+# trusted_key_servers include 'matrix.org'. See above.
 #
+#suppress_key_server_warning: true
 
 # The signing keys to use when acting as a trusted key server. If not specified
 # defaults to the server signing key.
diff --git a/synapse/config/key.py b/synapse/config/key.py
index ba2199bce..f039f96e9 100644
--- a/synapse/config/key.py
+++ b/synapse/config/key.py
@@ -50,6 +50,33 @@ and you should enable 'federation_verify_certificates' in your configuration.
 If you are *sure* you want to do this, set 'accept_keys_insecurely' on the
 trusted_key_server configuration."""
 
+TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN = """\
+Synapse requires that a list of trusted key servers are specified in order to
+provide signing keys for other servers in the federation.
+
+This homeserver does not have a trusted key server configured in
+homeserver.yaml and will fall back to the default of 'matrix.org'.
+
+Trusted key servers should be long-lived and stable which makes matrix.org a
+good choice for many admins, but some admins may wish to choose another. To
+suppress this warning, the admin should set 'trusted_key_servers' in
+homeserver.yaml to their desired key server and 'suppress_key_server_warning'
+to 'true'.
+
+In a future release the software-defined default will be removed entirely and
+the trusted key server will be defined exclusively by the value of
+'trusted_key_servers'.
+--------------------------------------------------------------------------------"""
+
+TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN = """\
+This server is configured to use 'matrix.org' as its trusted key server via the
+'trusted_key_servers' config option. 'matrix.org' is a good choice for a key
+server since it is long-lived, stable and trusted. However, some admins may
+wish to use another server for this purpose.
+
+To suppress this warning and continue using 'matrix.org', admins should set
+'suppress_key_server_warning' to 'true' in homeserver.yaml.
+--------------------------------------------------------------------------------"""
 
 logger = logging.getLogger(__name__)
 
@@ -85,6 +112,7 @@ class KeyConfig(Config):
             config.get("key_refresh_interval", "1d")
         )
 
+        suppress_key_server_warning = config.get("suppress_key_server_warning", False)
         key_server_signing_keys_path = config.get("key_server_signing_keys_path")
         if key_server_signing_keys_path:
             self.key_server_signing_keys = self.read_signing_keys(
@@ -95,6 +123,7 @@ class KeyConfig(Config):
 
         # if neither trusted_key_servers nor perspectives are given, use the default.
         if "perspectives" not in config and "trusted_key_servers" not in config:
+            logger.warn(TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN)
             key_servers = [{"server_name": "matrix.org"}]
         else:
             key_servers = config.get("trusted_key_servers", [])
@@ -108,6 +137,11 @@ class KeyConfig(Config):
             # merge the 'perspectives' config into the 'trusted_key_servers' config.
             key_servers.extend(_perspectives_to_key_servers(config))
 
+            if not suppress_key_server_warning and "matrix.org" in (
+                s["server_name"] for s in key_servers
+            ):
+                logger.warning(TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN)
+
         # list of TrustedKeyServer objects
         self.key_servers = list(
             _parse_key_servers(key_servers, self.federation_verify_certificates)
@@ -190,6 +224,10 @@ class KeyConfig(Config):
         # This setting supercedes an older setting named `perspectives`. The old format
         # is still supported for backwards-compatibility, but it is deprecated.
         #
+        # 'trusted_key_servers' defaults to matrix.org, but using it will generate a
+        # warning on start-up. To suppress this warning, set
+        # 'suppress_key_server_warning' to true.
+        #
         # Options for each entry in the list include:
         #
         #    server_name: the name of the server. required.
@@ -214,11 +252,13 @@ class KeyConfig(Config):
         #      "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr"
         #  - server_name: "my_other_trusted_server.example.com"
         #
-        # The default configuration is:
-        #
-        #trusted_key_servers:
-        #  - server_name: "matrix.org"
+        trusted_key_servers:
+          - server_name: "matrix.org"
+
+        # Uncomment the following to disable the warning that is emitted when the
+        # trusted_key_servers include 'matrix.org'. See above.
         #
+        #suppress_key_server_warning: true
 
         # The signing keys to use when acting as a trusted key server. If not specified
         # defaults to the server signing key.
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 9d3f1b5bf..5ad7ee911 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -48,6 +48,13 @@ ROOM_COMPLEXITY_TOO_GREAT = (
     "to join this room."
 )
 
+METRICS_PORT_WARNING = """\
+The metrics_port configuration option is deprecated in Synapse 0.31 in favour of
+a listener. Please see
+https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md
+on how to configure the new listener.
+--------------------------------------------------------------------------------"""
+
 
 class ServerConfig(Config):
     def read_config(self, config, **kwargs):
@@ -341,14 +348,7 @@ class ServerConfig(Config):
 
         metrics_port = config.get("metrics_port")
         if metrics_port:
-            logger.warn(
-                (
-                    "The metrics_port configuration option is deprecated in Synapse 0.31 "
-                    "in favour of a listener. Please see "
-                    "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md"
-                    " on how to configure the new listener."
-                )
-            )
+            logger.warning(METRICS_PORT_WARNING)
 
             self.listeners.append(
                 {

From 3fbca80a8da753e07dcf6c9539978c45c06cd1e1 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Sep 2019 12:34:35 +0100
Subject: [PATCH 121/276] changelog

---
 CHANGES.md               | 156 +++++++++++++++++++++++++++++++++++++++
 changelog.d/5633.bugfix  |   1 -
 changelog.d/5680.misc    |   1 -
 changelog.d/5771.feature |   1 -
 changelog.d/5776.misc    |   1 -
 changelog.d/5835.feature |   1 -
 changelog.d/5844.misc    |   1 -
 changelog.d/5845.feature |   1 -
 changelog.d/5849.doc     |   1 -
 changelog.d/5850.feature |   1 -
 changelog.d/5852.feature |   1 -
 changelog.d/5853.feature |   1 -
 changelog.d/5855.misc    |   1 -
 changelog.d/5856.feature |   1 -
 changelog.d/5857.bugfix  |   1 -
 changelog.d/5859.feature |   1 -
 changelog.d/5860.misc    |   1 -
 changelog.d/5863.bugfix  |   1 -
 changelog.d/5864.feature |   1 -
 changelog.d/5868.feature |   1 -
 changelog.d/5875.misc    |   1 -
 changelog.d/5876.feature |   1 -
 changelog.d/5877.removal |   1 -
 changelog.d/5878.feature |   1 -
 changelog.d/5884.feature |   1 -
 changelog.d/5885.bugfix  |   1 -
 changelog.d/5886.misc    |   1 -
 changelog.d/5892.misc    |   1 -
 changelog.d/5893.misc    |   1 -
 changelog.d/5894.misc    |   1 -
 changelog.d/5895.feature |   1 -
 changelog.d/5896.misc    |   1 -
 changelog.d/5897.feature |   1 -
 changelog.d/5900.feature |   1 -
 changelog.d/5902.feature |   1 -
 changelog.d/5904.feature |   1 -
 changelog.d/5906.feature |   1 -
 changelog.d/5909.misc    |   1 -
 changelog.d/5911.misc    |   1 -
 changelog.d/5914.feature |   1 -
 changelog.d/5915.bugfix  |   1 -
 changelog.d/5920.bugfix  |   1 -
 changelog.d/5922.misc    |   1 -
 changelog.d/5926.misc    |   1 -
 changelog.d/5931.misc    |   1 -
 changelog.d/5934.feature |   1 -
 changelog.d/5938.misc    |   1 -
 changelog.d/5940.feature |   1 -
 changelog.d/5943.misc    |   1 -
 changelog.d/5953.misc    |   1 -
 changelog.d/5962.misc    |   1 -
 changelog.d/5963.misc    |   1 -
 changelog.d/5964.feature |   1 -
 changelog.d/5966.bugfix  |   1 -
 changelog.d/5967.bugfix  |   1 -
 changelog.d/5969.feature |   1 -
 changelog.d/5970.docker  |   1 -
 changelog.d/5971.bugfix  |   1 -
 changelog.d/5972.misc    |   1 -
 changelog.d/5974.feature |   1 -
 changelog.d/5975.misc    |   1 -
 changelog.d/5979.feature |   1 -
 changelog.d/5980.feature |   1 -
 changelog.d/5981.feature |   1 -
 changelog.d/5982.bugfix  |   1 -
 changelog.d/5983.feature |   1 -
 changelog.d/5984.bugfix  |   1 -
 changelog.d/5985.feature |   1 -
 changelog.d/5986.feature |   1 -
 changelog.d/5988.bugfix  |   1 -
 changelog.d/5989.misc    |   1 -
 changelog.d/5991.bugfix  |   1 -
 changelog.d/5992.feature |   1 -
 changelog.d/5993.feature |   1 -
 changelog.d/5994.feature |   1 -
 changelog.d/5995.bugfix  |   1 -
 changelog.d/5996.bugfix  |   1 -
 changelog.d/5998.bugfix  |   1 -
 changelog.d/6000.feature |   1 -
 changelog.d/6003.misc    |   1 -
 changelog.d/6004.bugfix  |   1 -
 changelog.d/6005.feature |   1 -
 changelog.d/6009.misc    |   1 -
 changelog.d/6010.misc    |   1 -
 changelog.d/6011.feature |   1 -
 changelog.d/6012.feature |   1 -
 changelog.d/6013.misc    |   1 -
 changelog.d/6015.feature |   1 -
 changelog.d/6016.misc    |   1 -
 changelog.d/6017.misc    |   1 -
 changelog.d/6020.bugfix  |   1 -
 changelog.d/6023.misc    |   1 -
 changelog.d/6024.bugfix  |   1 -
 changelog.d/6025.bugfix  |   1 -
 changelog.d/6026.feature |   1 -
 changelog.d/6027.doc     |   1 -
 changelog.d/6028.feature |   1 -
 changelog.d/6029.bugfix  |   1 -
 changelog.d/6032.misc    |   1 -
 changelog.d/6037.feature |   1 -
 changelog.d/6042.feature |   1 -
 changelog.d/6043.feature |   1 -
 changelog.d/6044.feature |   1 -
 changelog.d/6047.misc    |   2 -
 changelog.d/6049.doc     |   1 -
 changelog.d/6050.doc     |   1 -
 changelog.d/6053.bugfix  |   1 -
 changelog.d/6056.bugfix  |   1 -
 changelog.d/6058.docker  |   1 -
 changelog.d/6059.bugfix  |   1 -
 changelog.d/6062.bugfix  |   1 -
 changelog.d/6063.bugfix  |   1 -
 changelog.d/6064.misc    |   1 -
 changelog.d/6067.feature |   1 -
 changelog.d/6069.bugfix  |   1 -
 changelog.d/6072.misc    |   1 -
 changelog.d/6073.feature |   1 -
 changelog.d/6074.feature |   1 -
 changelog.d/6075.misc    |   1 -
 changelog.d/6078.feature |   1 -
 changelog.d/6079.feature |   1 -
 changelog.d/6082.feature |   1 -
 changelog.d/6089.misc    |   1 -
 changelog.d/6090.feature |   1 -
 changelog.d/6092.bugfix  |   1 -
 changelog.d/6097.bugfix  |   1 -
 changelog.d/6098.feature |   1 -
 changelog.d/6099.misc    |   1 -
 changelog.d/6104.bugfix  |   1 -
 changelog.d/6105.misc    |   1 -
 changelog.d/6106.misc    |   1 -
 changelog.d/6107.bugfix  |   1 -
 132 files changed, 156 insertions(+), 132 deletions(-)
 delete mode 100644 changelog.d/5633.bugfix
 delete mode 100644 changelog.d/5680.misc
 delete mode 100644 changelog.d/5771.feature
 delete mode 100644 changelog.d/5776.misc
 delete mode 100644 changelog.d/5835.feature
 delete mode 100644 changelog.d/5844.misc
 delete mode 100644 changelog.d/5845.feature
 delete mode 100644 changelog.d/5849.doc
 delete mode 100644 changelog.d/5850.feature
 delete mode 100644 changelog.d/5852.feature
 delete mode 100644 changelog.d/5853.feature
 delete mode 100644 changelog.d/5855.misc
 delete mode 100644 changelog.d/5856.feature
 delete mode 100644 changelog.d/5857.bugfix
 delete mode 100644 changelog.d/5859.feature
 delete mode 100644 changelog.d/5860.misc
 delete mode 100644 changelog.d/5863.bugfix
 delete mode 100644 changelog.d/5864.feature
 delete mode 100644 changelog.d/5868.feature
 delete mode 100644 changelog.d/5875.misc
 delete mode 100644 changelog.d/5876.feature
 delete mode 100644 changelog.d/5877.removal
 delete mode 100644 changelog.d/5878.feature
 delete mode 100644 changelog.d/5884.feature
 delete mode 100644 changelog.d/5885.bugfix
 delete mode 100644 changelog.d/5886.misc
 delete mode 100644 changelog.d/5892.misc
 delete mode 100644 changelog.d/5893.misc
 delete mode 100644 changelog.d/5894.misc
 delete mode 100644 changelog.d/5895.feature
 delete mode 100644 changelog.d/5896.misc
 delete mode 100644 changelog.d/5897.feature
 delete mode 100644 changelog.d/5900.feature
 delete mode 100644 changelog.d/5902.feature
 delete mode 100644 changelog.d/5904.feature
 delete mode 100644 changelog.d/5906.feature
 delete mode 100644 changelog.d/5909.misc
 delete mode 100644 changelog.d/5911.misc
 delete mode 100644 changelog.d/5914.feature
 delete mode 100644 changelog.d/5915.bugfix
 delete mode 100644 changelog.d/5920.bugfix
 delete mode 100644 changelog.d/5922.misc
 delete mode 100644 changelog.d/5926.misc
 delete mode 100644 changelog.d/5931.misc
 delete mode 100644 changelog.d/5934.feature
 delete mode 100644 changelog.d/5938.misc
 delete mode 100644 changelog.d/5940.feature
 delete mode 100644 changelog.d/5943.misc
 delete mode 100644 changelog.d/5953.misc
 delete mode 100644 changelog.d/5962.misc
 delete mode 100644 changelog.d/5963.misc
 delete mode 100644 changelog.d/5964.feature
 delete mode 100644 changelog.d/5966.bugfix
 delete mode 100644 changelog.d/5967.bugfix
 delete mode 100644 changelog.d/5969.feature
 delete mode 100644 changelog.d/5970.docker
 delete mode 100644 changelog.d/5971.bugfix
 delete mode 100644 changelog.d/5972.misc
 delete mode 100644 changelog.d/5974.feature
 delete mode 100644 changelog.d/5975.misc
 delete mode 100644 changelog.d/5979.feature
 delete mode 100644 changelog.d/5980.feature
 delete mode 100644 changelog.d/5981.feature
 delete mode 100644 changelog.d/5982.bugfix
 delete mode 100644 changelog.d/5983.feature
 delete mode 100644 changelog.d/5984.bugfix
 delete mode 100644 changelog.d/5985.feature
 delete mode 100644 changelog.d/5986.feature
 delete mode 100644 changelog.d/5988.bugfix
 delete mode 100644 changelog.d/5989.misc
 delete mode 100644 changelog.d/5991.bugfix
 delete mode 100644 changelog.d/5992.feature
 delete mode 100644 changelog.d/5993.feature
 delete mode 100644 changelog.d/5994.feature
 delete mode 100644 changelog.d/5995.bugfix
 delete mode 100644 changelog.d/5996.bugfix
 delete mode 100644 changelog.d/5998.bugfix
 delete mode 100644 changelog.d/6000.feature
 delete mode 100644 changelog.d/6003.misc
 delete mode 100644 changelog.d/6004.bugfix
 delete mode 100644 changelog.d/6005.feature
 delete mode 100644 changelog.d/6009.misc
 delete mode 100644 changelog.d/6010.misc
 delete mode 100644 changelog.d/6011.feature
 delete mode 100644 changelog.d/6012.feature
 delete mode 100644 changelog.d/6013.misc
 delete mode 100644 changelog.d/6015.feature
 delete mode 100644 changelog.d/6016.misc
 delete mode 100644 changelog.d/6017.misc
 delete mode 100644 changelog.d/6020.bugfix
 delete mode 100644 changelog.d/6023.misc
 delete mode 100644 changelog.d/6024.bugfix
 delete mode 100644 changelog.d/6025.bugfix
 delete mode 100644 changelog.d/6026.feature
 delete mode 100644 changelog.d/6027.doc
 delete mode 100644 changelog.d/6028.feature
 delete mode 100644 changelog.d/6029.bugfix
 delete mode 100644 changelog.d/6032.misc
 delete mode 100644 changelog.d/6037.feature
 delete mode 100644 changelog.d/6042.feature
 delete mode 100644 changelog.d/6043.feature
 delete mode 100644 changelog.d/6044.feature
 delete mode 100644 changelog.d/6047.misc
 delete mode 100644 changelog.d/6049.doc
 delete mode 100644 changelog.d/6050.doc
 delete mode 100644 changelog.d/6053.bugfix
 delete mode 100644 changelog.d/6056.bugfix
 delete mode 100644 changelog.d/6058.docker
 delete mode 100644 changelog.d/6059.bugfix
 delete mode 100644 changelog.d/6062.bugfix
 delete mode 100644 changelog.d/6063.bugfix
 delete mode 100644 changelog.d/6064.misc
 delete mode 100644 changelog.d/6067.feature
 delete mode 100644 changelog.d/6069.bugfix
 delete mode 100644 changelog.d/6072.misc
 delete mode 100644 changelog.d/6073.feature
 delete mode 100644 changelog.d/6074.feature
 delete mode 100644 changelog.d/6075.misc
 delete mode 100644 changelog.d/6078.feature
 delete mode 100644 changelog.d/6079.feature
 delete mode 100644 changelog.d/6082.feature
 delete mode 100644 changelog.d/6089.misc
 delete mode 100644 changelog.d/6090.feature
 delete mode 100644 changelog.d/6092.bugfix
 delete mode 100644 changelog.d/6097.bugfix
 delete mode 100644 changelog.d/6098.feature
 delete mode 100644 changelog.d/6099.misc
 delete mode 100644 changelog.d/6104.bugfix
 delete mode 100644 changelog.d/6105.misc
 delete mode 100644 changelog.d/6106.misc
 delete mode 100644 changelog.d/6107.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index f25c7d0c1..9f610e4c1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,159 @@
+Synapse 1.4.0rc1 (2019-09-26)
+=============================
+
+Note that this release includes significant changes around 3pid
+verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst##upgrading-to-v140).
+
+Features
+--------
+
+- Changes to 3pid verification:
+  - Add the ability to send registration emails from the homeserver rather than delegating to an identity server. ([\#5835](https://github.com/matrix-org/synapse/issues/5835), [\#5940](https://github.com/matrix-org/synapse/issues/5940), [\#5993](https://github.com/matrix-org/synapse/issues/5993), [\#5994](https://github.com/matrix-org/synapse/issues/5994), [\#5868](https://github.com/matrix-org/synapse/issues/5868))
+  - Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. ([\#5876](https://github.com/matrix-org/synapse/issues/5876), [\#5969](https://github.com/matrix-org/synapse/issues/5969), [\#6028](https://github.com/matrix-org/synapse/issues/6028))
+  - Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)). ([\#5897](https://github.com/matrix-org/synapse/issues/5897))
+  - Remove `bind_email` and `bind_msisdn` parameters from `/register` ala [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140). ([\#5964](https://github.com/matrix-org/synapse/issues/5964))
+  - Add `m.id_access_token` to `unstable_features` in `/versions` as per [MSC2264](https://github.com/matrix-org/matrix-doc/pull/2264). ([\#5974](https://github.com/matrix-org/synapse/issues/5974))
+  - Use the v2 Identity Service API for 3PID invites. ([\#5979](https://github.com/matrix-org/synapse/issues/5979))
+  - Add `POST /_matrix/client/unstable/account/3pid/unbind` endpoint from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140) for unbinding a 3PID from an identity server without removing it from the homeserver user account. ([\#5980](https://github.com/matrix-org/synapse/issues/5980), [\#6062](https://github.com/matrix-org/synapse/issues/6062))
+)
+  - Use `account_threepid_delegate.email` and `account_threepid_delegate.msisdn` for validating threepid sessions. ([\#6011](https://github.com/matrix-org/synapse/issues/6011))
+  - Allow homeserver to handle or delegate email validation when adding an email to a user's account. ([\#6042](https://github.com/matrix-org/synapse/issues/6042))
+  - Implement new Client Server API endpoints `/account/3pid/add` and `/account/3pid/bind` as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290). ([\#6043](https://github.com/matrix-org/synapse/issues/6043))
+  - Add an unstable feature flag for separate add/bind 3pid APIs. ([\#6044](https://github.com/matrix-org/synapse/issues/6044))
+  - Remove `bind` parameter from Client Server POST `/account` endpoint as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/). ([\#6067](https://github.com/matrix-org/synapse/issues/6067))
+  - Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an account_threepid_handler. ([\#6078](https://github.com/matrix-org/synapse/issues/6078))
+  - Add `submit_url` response parameter to `*/msisdn/requestToken` endpoints. ([\#6079](https://github.com/matrix-org/synapse/issues/6079))
+  - Add `m.require_identity_server` flag to /version's unstable_features. ([\#5972](https://github.com/matrix-org/synapse/issues/5972))
+- Enhancements to OpenTracing support:
+  - Make OpenTracing work in worker mode. ([\#5771](https://github.com/matrix-org/synapse/issues/5771))
+  - Pass OpenTracing contexts between servers when transmitting EDUs. ([\#5852](https://github.com/matrix-org/synapse/issues/5852))
+  - OpenTracing for device list updates. ([\#5853](https://github.com/matrix-org/synapse/issues/5853))
+  - Add a tag recording a request's authenticated entity and corresponding servlet in OpenTracing. ([\#5856](https://github.com/matrix-org/synapse/issues/5856))
+  - Add minimum OpenTracing for client servlets. ([\#5983](https://github.com/matrix-org/synapse/issues/5983))
+  - Check at setup that OpenTracing is installed if it's enabled in the config. ([\#5985](https://github.com/matrix-org/synapse/issues/5985))
+  - Trace replication send times. ([\#5986](https://github.com/matrix-org/synapse/issues/5986))
+  - Include missing OpenTracing contexts in outbout replication requests. ([\#5982](https://github.com/matrix-org/synapse/issues/5982))
+  - Fix sending of EDUs when OpenTracing is enabled with an empty whitelist. ([\#5984](https://github.com/matrix-org/synapse/issues/5984))
+  - Fix invalid references to None while OpenTracing if the log context slips. ([\#5988](https://github.com/matrix-org/synapse/issues/5988), [\#5991](https://github.com/matrix-org/synapse/issues/5991))
+  - OpenTracing for room and e2e keys. ([\#5855](https://github.com/matrix-org/synapse/issues/5855))
+  - Add OpenTracing span over HTTP push processing. ([\#6003](https://github.com/matrix-org/synapse/issues/6003))
+- Add an admin API to purge old rooms from the database. ([\#5845](https://github.com/matrix-org/synapse/issues/5845))
+- Retry well-known lookups if we have recently seen a valid well-known record for the server. ([\#5850](https://github.com/matrix-org/synapse/issues/5850))
+- Add support for filtered room-directory search requests over federation ([MSC2197](https://github.com/matrix-org/matrix-doc/pull/2197), in order to allow upcoming room directory query performance improvements. ([\#5859](https://github.com/matrix-org/synapse/issues/5859))
+- Correctly retry all hosts returned from SRV when we fail to connect. ([\#5864](https://github.com/matrix-org/synapse/issues/5864))
+- Add admin API endpoint for setting whether or not a user is a server administrator. ([\#5878](https://github.com/matrix-org/synapse/issues/5878))
+- Enable cleaning up extremities with dummy events by default to prevent undue build up of forward extremities. ([\#5884](https://github.com/matrix-org/synapse/issues/5884))
+- Add config option to sign remote key query responses with a separate key. ([\#5895](https://github.com/matrix-org/synapse/issues/5895))
+- Add support for config templating. ([\#5900](https://github.com/matrix-org/synapse/issues/5900))
+- Users with the type of "support" or "bot" are no longer required to consent. ([\#5902](https://github.com/matrix-org/synapse/issues/5902))
+- Let synctl accept a directory of config files. ([\#5904](https://github.com/matrix-org/synapse/issues/5904))
+- Increase max display name size to 256. ([\#5906](https://github.com/matrix-org/synapse/issues/5906))
+- Add admin API endpoint for getting whether or not a user is a server administrator. ([\#5914](https://github.com/matrix-org/synapse/issues/5914))
+- Redact events in the database that have been redacted for a month. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
+- New prometheus metrics:
+  - `synapse_federation_known_servers`: represents the total number of servers your server knows about (i.e. is in rooms with), including itself. Enable by setting `metrics_flags.known_servers` to True in the configuration.([\#5981](https://github.com/matrix-org/synapse/issues/5981))
+  - `synapse_build_info`: exposes the Python version, OS version, and Synapse version of the running server. ([\#6005](https://github.com/matrix-org/synapse/issues/6005))
+- Give appropriate exit codes when synctl fails. ([\#5992](https://github.com/matrix-org/synapse/issues/5992))
+- Apply the federation blacklist to requests to identity servers. ([\#6000](https://github.com/matrix-org/synapse/issues/6000))
+- Add `report_stats_endpoint` option to configure where stats are reported to, if enabled. Contributed by @Sorunome. ([\#6012](https://github.com/matrix-org/synapse/issues/6012))
+- Add config option to increase ratelimits for room admins redacting messages. ([\#6015](https://github.com/matrix-org/synapse/issues/6015))
+- Stop sending federation transactions to servers which have been down for a long time. ([\#6026](https://github.com/matrix-org/synapse/issues/6026))
+- Make the process for mapping SAML2 users to matrix IDs more flexible. ([\#6037](https://github.com/matrix-org/synapse/issues/6037))
+- Return a clearer error message when a timeout occurs when attempting to contact an identity server. ([\#6073](https://github.com/matrix-org/synapse/issues/6073))
+- Prevent password reset's submit_token endpoint from accepting trailing slashes. ([\#6074](https://github.com/matrix-org/synapse/issues/6074))
+- Return 403 on `/register/available` if registration has been disabled. ([\#6082](https://github.com/matrix-org/synapse/issues/6082))
+- Explicitly log when a homeserver does not have the `trusted_key_servers` config field configured. ([\#6090](https://github.com/matrix-org/synapse/issues/6090))
+- Add support for pruning old rows in `user_ips` table. ([\#6098](https://github.com/matrix-org/synapse/issues/6098))
+
+Bugfixes
+--------
+
+- Don't create broken room when `power_level_content_override.users` does not contain `creator_id`. ([\#5633](https://github.com/matrix-org/synapse/issues/5633))
+- Fix database index so that different backup versions can have the same sessions. ([\#5857](https://github.com/matrix-org/synapse/issues/5857))
+- Fix Synapse looking for config options `password_reset_failure_template` and `password_reset_success_template`, when they are actually `password_reset_template_failure_html`, `password_reset_template_success_html`. ([\#5863](https://github.com/matrix-org/synapse/issues/5863))
+- Fix stack overflow when recovering an appservice which had an outage. ([\#5885](https://github.com/matrix-org/synapse/issues/5885))
+- Fix error message which referred to `public_base_url` instead of `public_baseurl`. Thanks to @aaronraimist for the fix! ([\#5909](https://github.com/matrix-org/synapse/issues/5909))
+- Fix 404 for thumbnail download when `dynamic_thumbnails` is `false` and the thumbnail was dynamically generated. Fix reported by rkfg. ([\#5915](https://github.com/matrix-org/synapse/issues/5915))
+- Fix a cache-invalidation bug for worker-based deployments. ([\#5920](https://github.com/matrix-org/synapse/issues/5920))
+- Fix admin API for listing media in a room not being available with an external media repo. ([\#5966](https://github.com/matrix-org/synapse/issues/5966))
+- Fix list media admin API always returning an error. ([\#5967](https://github.com/matrix-org/synapse/issues/5967))
+- Fix room and user stats tracking. ([\#5971](https://github.com/matrix-org/synapse/issues/5971), [\#5998](https://github.com/matrix-org/synapse/issues/5998), [\#6029](https://github.com/matrix-org/synapse/issues/6029))
+- Return a `M_MISSING_PARAM` if `sid` is not provided to `/account/3pid`. ([\#5995](https://github.com/matrix-org/synapse/issues/5995))
+- `federation_certificate_verification_whitelist` now will not cause `TypeErrors` to be raised (a regression in 1.3). Additionally, it now supports internationalised domain names in their non-canonical representation. ([\#5996](https://github.com/matrix-org/synapse/issues/5996))
+- Only count real users when checking for auto-creation of auto-join room. ([\#6004](https://github.com/matrix-org/synapse/issues/6004))
+- Ensure support users can be registered even if MAU limit is reached. ([\#6020](https://github.com/matrix-org/synapse/issues/6020))
+- Fix bug where login error was shown incorrectly on SSO fallback login. ([\#6024](https://github.com/matrix-org/synapse/issues/6024))
+- Fix bug in calculating the federation retry backoff period. ([\#6025](https://github.com/matrix-org/synapse/issues/6025))
+- Prevent exceptions being logged when extremity-cleanup events fail due to lack of user consent to the terms of service. ([\#6053](https://github.com/matrix-org/synapse/issues/6053))
+- Remove POST method from password-reset `submit_token` endpoint until we implement `submit_url` functionality. ([\#6056](https://github.com/matrix-org/synapse/issues/6056))
+- Fix logcontext spam on non-Linux platforms. ([\#6059](https://github.com/matrix-org/synapse/issues/6059))
+- Ensure query parameters in email validation links are URL-encoded. ([\#6063](https://github.com/matrix-org/synapse/issues/6063))
+- Fix a bug which caused SAML attribute maps to be overridden by defaults. ([\#6069](https://github.com/matrix-org/synapse/issues/6069))
+- Fix the logged number of updated items for the users_set_deactivated_flag background update. ([\#6092](https://github.com/matrix-org/synapse/issues/6092))
+- Add sid to `next_link` for email validation. ([\#6097](https://github.com/matrix-org/synapse/issues/6097))
+- Threepid validity checks on msisdns should not be dependent on `threepid_behaviour_email`. ([\#6104](https://github.com/matrix-org/synapse/issues/6104))
+- Ensure that servers which are not configured to support email address verification do not offer it in the registration flows. ([\#6107](https://github.com/matrix-org/synapse/issues/6107))
+
+
+Updates to the Docker image
+---------------------------
+
+- Avoid changing UID/GID if they are already correct. ([\#5970](https://github.com/matrix-org/synapse/issues/5970))
+- Provide SYNAPSE_WORKER envvar to specify python module. ([\#6058](https://github.com/matrix-org/synapse/issues/6058))
+
+
+Improved Documentation
+----------------------
+
+- Convert documentation to markdown (from rst) ([\#5849](https://github.com/matrix-org/synapse/issues/5849))
+- Update `INSTALL.md` to say that Python 2 is no longer supported. ([\#5953](https://github.com/matrix-org/synapse/issues/5953))
+- Add developer documentation for using SAML2. ([\#6032](https://github.com/matrix-org/synapse/issues/6032))
+- Add some notes on rolling back to v1.3.1. ([\#6049](https://github.com/matrix-org/synapse/issues/6049))
+- Update the upgrade notes. ([\#6050](https://github.com/matrix-org/synapse/issues/6050))
+
+
+Deprecations and Removals
+-------------------------
+
+- Remove shared-secret registration from `/_matrix/client/r0/register` endpoint. Contributed by Awesome Technologies Innovationslabor GmbH. ([\#5877](https://github.com/matrix-org/synapse/issues/5877))
+- Deprecate the `trusted_third_party_id_servers` option. ([\#5875](https://github.com/matrix-org/synapse/issues/5875))
+
+
+Internal Changes
+----------------
+
+- Lay the groundwork for structured logging output. ([\#5680](https://github.com/matrix-org/synapse/issues/5680))
+- Retry well-known lookup before the cache expires, giving a grace period where the remote well-known can be down but we still use the old result. ([\#5844](https://github.com/matrix-org/synapse/issues/5844))
+- Remove log line for debugging issue #5407. ([\#5860](https://github.com/matrix-org/synapse/issues/5860))
+- Refactor the Appservice scheduler code. ([\#5886](https://github.com/matrix-org/synapse/issues/5886))
+- Compatibility with v2 Identity Service APIs other than /lookup. ([\#5892](https://github.com/matrix-org/synapse/issues/5892), [\#6013](https://github.com/matrix-org/synapse/issues/6013))
+- Stop populating some unused tables. ([\#5893](https://github.com/matrix-org/synapse/issues/5893), [\#6047](https://github.com/matrix-org/synapse/issues/6047))
+- Add missing index on users_in_public_rooms to improve the performance of directory queries. ([\#5894](https://github.com/matrix-org/synapse/issues/5894))
+- Improve the logging when we have an error when fetching signing keys. ([\#5896](https://github.com/matrix-org/synapse/issues/5896))
+- Add support for database engine-specific schema deltas, based on file extension. ([\#5911](https://github.com/matrix-org/synapse/issues/5911))
+- Update Buildkite pipeline to use plugins instead of buildkite-agent commands. ([\#5922](https://github.com/matrix-org/synapse/issues/5922))
+- Add link in sample config to the logging config schema. ([\#5926](https://github.com/matrix-org/synapse/issues/5926))
+- Remove unnecessary parentheses in return statements. ([\#5931](https://github.com/matrix-org/synapse/issues/5931))
+- Remove unused `jenkins/prepare_sytest.sh` file. ([\#5938](https://github.com/matrix-org/synapse/issues/5938))
+- Move Buildkite pipeline config to the pipelines repo. ([\#5943](https://github.com/matrix-org/synapse/issues/5943))
+- Remove unnecessary return statements in the codebase which were the result of a regex run. ([\#5962](https://github.com/matrix-org/synapse/issues/5962))
+- Remove left-over methods from v1 registration API. ([\#5963](https://github.com/matrix-org/synapse/issues/5963))
+- Cleanup event auth type initialisation. ([\#5975](https://github.com/matrix-org/synapse/issues/5975))
+- Clean up dependency checking at setup. ([\#5989](https://github.com/matrix-org/synapse/issues/5989))
+- Update OpenTracing docs to use the unified `trace` method. ([\#5776](https://github.com/matrix-org/synapse/issues/5776))
+- Small refactor of function arguments and docstrings in` RoomMemberHandler`. ([\#6009](https://github.com/matrix-org/synapse/issues/6009))
+- Remove unused `origin` argument on `FederationHandler.add_display_name_to_third_party_invite`. ([\#6010](https://github.com/matrix-org/synapse/issues/6010))
+- Add a `failure_ts` column to the `destinations` database table. ([\#6016](https://github.com/matrix-org/synapse/issues/6016), [\#6072](https://github.com/matrix-org/synapse/issues/6072))
+- Clean up some code in the retry logic. ([\#6017](https://github.com/matrix-org/synapse/issues/6017))
+- Fix the structured logging tests stomping on the global log configuration for subsequent tests. ([\#6023](https://github.com/matrix-org/synapse/issues/6023))
+- Clean up the sample config for SAML authentication. ([\#6064](https://github.com/matrix-org/synapse/issues/6064))
+- Change mailer logging to reflect Synapse doesn't just do chat notifications by email now. ([\#6075](https://github.com/matrix-org/synapse/issues/6075))
+- Move last-seen info into devices table. ([\#6089](https://github.com/matrix-org/synapse/issues/6089))
+- Remove unused parameter to `get_user_id_by_threepid`. ([\#6099](https://github.com/matrix-org/synapse/issues/6099))
+- Refactor the user-interactive auth handling. ([\#6105](https://github.com/matrix-org/synapse/issues/6105))
+- Refactor code for calculating registration flows. ([\#6106](https://github.com/matrix-org/synapse/issues/6106))
+
+
 Synapse 1.3.1 (2019-08-17)
 ==========================
 
diff --git a/changelog.d/5633.bugfix b/changelog.d/5633.bugfix
deleted file mode 100644
index b2ff803b9..000000000
--- a/changelog.d/5633.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Don't create broken room when power_level_content_override.users does not contain creator_id.
\ No newline at end of file
diff --git a/changelog.d/5680.misc b/changelog.d/5680.misc
deleted file mode 100644
index 46a403a18..000000000
--- a/changelog.d/5680.misc
+++ /dev/null
@@ -1 +0,0 @@
-Lay the groundwork for structured logging output.
diff --git a/changelog.d/5771.feature b/changelog.d/5771.feature
deleted file mode 100644
index f2f4de1fd..000000000
--- a/changelog.d/5771.feature
+++ /dev/null
@@ -1 +0,0 @@
-Make Opentracing work in worker mode.
diff --git a/changelog.d/5776.misc b/changelog.d/5776.misc
deleted file mode 100644
index 1fb1b9c15..000000000
--- a/changelog.d/5776.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update opentracing docs to use the unified `trace` method.
diff --git a/changelog.d/5835.feature b/changelog.d/5835.feature
deleted file mode 100644
index 3e8bf5068..000000000
--- a/changelog.d/5835.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add the ability to send registration emails from the homeserver rather than delegating to an identity server.
diff --git a/changelog.d/5844.misc b/changelog.d/5844.misc
deleted file mode 100644
index a0826af0d..000000000
--- a/changelog.d/5844.misc
+++ /dev/null
@@ -1 +0,0 @@
-Retry well-known lookup before the cache expires, giving a grace period where the remote well-known can be down but we still use the old result.
diff --git a/changelog.d/5845.feature b/changelog.d/5845.feature
deleted file mode 100644
index 7b0dc9a95..000000000
--- a/changelog.d/5845.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add an admin API to purge old rooms from the database.
diff --git a/changelog.d/5849.doc b/changelog.d/5849.doc
deleted file mode 100644
index fbe62e863..000000000
--- a/changelog.d/5849.doc
+++ /dev/null
@@ -1 +0,0 @@
-Convert documentation to markdown (from rst)
diff --git a/changelog.d/5850.feature b/changelog.d/5850.feature
deleted file mode 100644
index b565929a5..000000000
--- a/changelog.d/5850.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add retry to well-known lookups if we have recently seen a valid well-known record for the server.
diff --git a/changelog.d/5852.feature b/changelog.d/5852.feature
deleted file mode 100644
index 4a0fc6c54..000000000
--- a/changelog.d/5852.feature
+++ /dev/null
@@ -1 +0,0 @@
-Pass opentracing contexts between servers when transmitting EDUs.
diff --git a/changelog.d/5853.feature b/changelog.d/5853.feature
deleted file mode 100644
index 80a04ae2e..000000000
--- a/changelog.d/5853.feature
+++ /dev/null
@@ -1 +0,0 @@
-Opentracing for device list updates.
diff --git a/changelog.d/5855.misc b/changelog.d/5855.misc
deleted file mode 100644
index 32db7fbe3..000000000
--- a/changelog.d/5855.misc
+++ /dev/null
@@ -1 +0,0 @@
-Opentracing for room and e2e keys.
diff --git a/changelog.d/5856.feature b/changelog.d/5856.feature
deleted file mode 100644
index f4310b924..000000000
--- a/changelog.d/5856.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add a tag recording a request's authenticated entity and corresponding servlet in opentracing.
diff --git a/changelog.d/5857.bugfix b/changelog.d/5857.bugfix
deleted file mode 100644
index 008799ccb..000000000
--- a/changelog.d/5857.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix database index so that different backup versions can have the same sessions.
diff --git a/changelog.d/5859.feature b/changelog.d/5859.feature
deleted file mode 100644
index 52df7fc81..000000000
--- a/changelog.d/5859.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add unstable support for MSC2197 (filtered search requests over federation), in order to allow upcoming room directory query performance improvements.
diff --git a/changelog.d/5860.misc b/changelog.d/5860.misc
deleted file mode 100644
index f9960b17b..000000000
--- a/changelog.d/5860.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove log line for debugging issue #5407.
diff --git a/changelog.d/5863.bugfix b/changelog.d/5863.bugfix
deleted file mode 100644
index bceae5be6..000000000
--- a/changelog.d/5863.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix Synapse looking for config options `password_reset_failure_template` and `password_reset_success_template`, when they are actually `password_reset_template_failure_html`, `password_reset_template_success_html`.
diff --git a/changelog.d/5864.feature b/changelog.d/5864.feature
deleted file mode 100644
index 40ac11db6..000000000
--- a/changelog.d/5864.feature
+++ /dev/null
@@ -1 +0,0 @@
-Correctly retry all hosts returned from SRV when we fail to connect.
diff --git a/changelog.d/5868.feature b/changelog.d/5868.feature
deleted file mode 100644
index 69605c1ae..000000000
--- a/changelog.d/5868.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `m.require_identity_server` key to `/versions`'s `unstable_features` section.
\ No newline at end of file
diff --git a/changelog.d/5875.misc b/changelog.d/5875.misc
deleted file mode 100644
index e188c28d2..000000000
--- a/changelog.d/5875.misc
+++ /dev/null
@@ -1 +0,0 @@
-Deprecate the `trusted_third_party_id_servers` option.
\ No newline at end of file
diff --git a/changelog.d/5876.feature b/changelog.d/5876.feature
deleted file mode 100644
index df88193fb..000000000
--- a/changelog.d/5876.feature
+++ /dev/null
@@ -1 +0,0 @@
-Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`.
\ No newline at end of file
diff --git a/changelog.d/5877.removal b/changelog.d/5877.removal
deleted file mode 100644
index b6d84fb40..000000000
--- a/changelog.d/5877.removal
+++ /dev/null
@@ -1 +0,0 @@
-Remove shared secret registration from client/r0/register endpoint. Contributed by Awesome Technologies Innovationslabor GmbH.
diff --git a/changelog.d/5878.feature b/changelog.d/5878.feature
deleted file mode 100644
index d9d6df880..000000000
--- a/changelog.d/5878.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add admin API endpoint for setting whether or not a user is a server administrator.
diff --git a/changelog.d/5884.feature b/changelog.d/5884.feature
deleted file mode 100644
index bfd048939..000000000
--- a/changelog.d/5884.feature
+++ /dev/null
@@ -1 +0,0 @@
-Enable cleaning up extremities with dummy events by default to prevent undue build up of forward extremities.
diff --git a/changelog.d/5885.bugfix b/changelog.d/5885.bugfix
deleted file mode 100644
index 411d925fd..000000000
--- a/changelog.d/5885.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix stack overflow when recovering an appservice which had an outage.
diff --git a/changelog.d/5886.misc b/changelog.d/5886.misc
deleted file mode 100644
index 22adba3d8..000000000
--- a/changelog.d/5886.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor the Appservice scheduler code.
diff --git a/changelog.d/5892.misc b/changelog.d/5892.misc
deleted file mode 100644
index 939fe8c65..000000000
--- a/changelog.d/5892.misc
+++ /dev/null
@@ -1 +0,0 @@
-Compatibility with v2 Identity Service APIs other than /lookup.
\ No newline at end of file
diff --git a/changelog.d/5893.misc b/changelog.d/5893.misc
deleted file mode 100644
index 5ef171cb3..000000000
--- a/changelog.d/5893.misc
+++ /dev/null
@@ -1 +0,0 @@
-Stop populating some unused tables.
diff --git a/changelog.d/5894.misc b/changelog.d/5894.misc
deleted file mode 100644
index fca4485ff..000000000
--- a/changelog.d/5894.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add missing index on users_in_public_rooms to improve the performance of directory queries.
diff --git a/changelog.d/5895.feature b/changelog.d/5895.feature
deleted file mode 100644
index c394a3772..000000000
--- a/changelog.d/5895.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add config option to sign remote key query responses with a separate key.
diff --git a/changelog.d/5896.misc b/changelog.d/5896.misc
deleted file mode 100644
index ed47c747b..000000000
--- a/changelog.d/5896.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve the logging when we have an error when fetching signing keys.
diff --git a/changelog.d/5897.feature b/changelog.d/5897.feature
deleted file mode 100644
index 1557e559e..000000000
--- a/changelog.d/5897.feature
+++ /dev/null
@@ -1 +0,0 @@
-Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)).
diff --git a/changelog.d/5900.feature b/changelog.d/5900.feature
deleted file mode 100644
index b62d88a76..000000000
--- a/changelog.d/5900.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add support for config templating.
diff --git a/changelog.d/5902.feature b/changelog.d/5902.feature
deleted file mode 100644
index 0660f65cf..000000000
--- a/changelog.d/5902.feature
+++ /dev/null
@@ -1 +0,0 @@
-Users with the type of "support" or "bot" are no longer required to consent.
\ No newline at end of file
diff --git a/changelog.d/5904.feature b/changelog.d/5904.feature
deleted file mode 100644
index 43b5304f3..000000000
--- a/changelog.d/5904.feature
+++ /dev/null
@@ -1 +0,0 @@
-Let synctl accept a directory of config files.
diff --git a/changelog.d/5906.feature b/changelog.d/5906.feature
deleted file mode 100644
index 7c789510a..000000000
--- a/changelog.d/5906.feature
+++ /dev/null
@@ -1 +0,0 @@
-Increase max display name size to 256.
diff --git a/changelog.d/5909.misc b/changelog.d/5909.misc
deleted file mode 100644
index 03d0c4367..000000000
--- a/changelog.d/5909.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix error message which referred to public_base_url instead of public_baseurl. Thanks to @aaronraimist for the fix!
diff --git a/changelog.d/5911.misc b/changelog.d/5911.misc
deleted file mode 100644
index fe5a8fd59..000000000
--- a/changelog.d/5911.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add support for database engine-specific schema deltas, based on file extension.
\ No newline at end of file
diff --git a/changelog.d/5914.feature b/changelog.d/5914.feature
deleted file mode 100644
index 85c7bf596..000000000
--- a/changelog.d/5914.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add admin API endpoint for getting whether or not a user is a server administrator.
diff --git a/changelog.d/5915.bugfix b/changelog.d/5915.bugfix
deleted file mode 100644
index bf5b99fed..000000000
--- a/changelog.d/5915.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix 404 for thumbnail download when `dynamic_thumbnails` is `false` and the thumbnail was dynamically generated. Fix reported by rkfg.
diff --git a/changelog.d/5920.bugfix b/changelog.d/5920.bugfix
deleted file mode 100644
index e45eb0ffe..000000000
--- a/changelog.d/5920.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a cache-invalidation bug for worker-based deployments.
diff --git a/changelog.d/5922.misc b/changelog.d/5922.misc
deleted file mode 100644
index 2cc864897..000000000
--- a/changelog.d/5922.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update Buildkite pipeline to use plugins instead of buildkite-agent commands.
diff --git a/changelog.d/5926.misc b/changelog.d/5926.misc
deleted file mode 100644
index 4383c302e..000000000
--- a/changelog.d/5926.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add link in sample config to the logging config schema.
diff --git a/changelog.d/5931.misc b/changelog.d/5931.misc
deleted file mode 100644
index ac8e74f5b..000000000
--- a/changelog.d/5931.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove unnecessary parentheses in return statements.
\ No newline at end of file
diff --git a/changelog.d/5934.feature b/changelog.d/5934.feature
deleted file mode 100644
index eae969a52..000000000
--- a/changelog.d/5934.feature
+++ /dev/null
@@ -1 +0,0 @@
-Redact events in the database that have been redacted for a month.
diff --git a/changelog.d/5938.misc b/changelog.d/5938.misc
deleted file mode 100644
index b5a3b6ee3..000000000
--- a/changelog.d/5938.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove unused jenkins/prepare_sytest.sh file.
diff --git a/changelog.d/5940.feature b/changelog.d/5940.feature
deleted file mode 100644
index 5b69b97fe..000000000
--- a/changelog.d/5940.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add the ability to send registration emails from the homeserver rather than delegating to an identity server.
\ No newline at end of file
diff --git a/changelog.d/5943.misc b/changelog.d/5943.misc
deleted file mode 100644
index 6545e1244..000000000
--- a/changelog.d/5943.misc
+++ /dev/null
@@ -1 +0,0 @@
-Move Buildkite pipeline config to the pipelines repo.
diff --git a/changelog.d/5953.misc b/changelog.d/5953.misc
deleted file mode 100644
index 38e885f42..000000000
--- a/changelog.d/5953.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update INSTALL.md to say that Python 2 is no longer supported.
diff --git a/changelog.d/5962.misc b/changelog.d/5962.misc
deleted file mode 100644
index d97d376c3..000000000
--- a/changelog.d/5962.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove unnecessary return statements in the codebase which were the result of a regex run.
\ No newline at end of file
diff --git a/changelog.d/5963.misc b/changelog.d/5963.misc
deleted file mode 100644
index 0d6c3c3d6..000000000
--- a/changelog.d/5963.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove left-over methods from C/S registration API.
\ No newline at end of file
diff --git a/changelog.d/5964.feature b/changelog.d/5964.feature
deleted file mode 100644
index 273c9df02..000000000
--- a/changelog.d/5964.feature
+++ /dev/null
@@ -1 +0,0 @@
-Remove `bind_email` and `bind_msisdn` parameters from /register ala MSC2140.
\ No newline at end of file
diff --git a/changelog.d/5966.bugfix b/changelog.d/5966.bugfix
deleted file mode 100644
index b8ef5a781..000000000
--- a/changelog.d/5966.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix admin API for listing media in a room not being available with an external media repo.
diff --git a/changelog.d/5967.bugfix b/changelog.d/5967.bugfix
deleted file mode 100644
index 8d7bf5c2e..000000000
--- a/changelog.d/5967.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix list media admin API always returning an error.
diff --git a/changelog.d/5969.feature b/changelog.d/5969.feature
deleted file mode 100644
index cf603fa0c..000000000
--- a/changelog.d/5969.feature
+++ /dev/null
@@ -1 +0,0 @@
-Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`.
diff --git a/changelog.d/5970.docker b/changelog.d/5970.docker
deleted file mode 100644
index c9d04da9c..000000000
--- a/changelog.d/5970.docker
+++ /dev/null
@@ -1 +0,0 @@
-Avoid changing UID/GID if they are already correct.
diff --git a/changelog.d/5971.bugfix b/changelog.d/5971.bugfix
deleted file mode 100644
index 9ea095103..000000000
--- a/changelog.d/5971.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix room and user stats tracking.
diff --git a/changelog.d/5972.misc b/changelog.d/5972.misc
deleted file mode 100644
index 1dc217e89..000000000
--- a/changelog.d/5972.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add m.require_identity_server flag to /version's unstable_features.
\ No newline at end of file
diff --git a/changelog.d/5974.feature b/changelog.d/5974.feature
deleted file mode 100644
index 387a444fc..000000000
--- a/changelog.d/5974.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add m.id_access_token to unstable_features in /versions as per MSC2264.
\ No newline at end of file
diff --git a/changelog.d/5975.misc b/changelog.d/5975.misc
deleted file mode 100644
index 5fcd229b8..000000000
--- a/changelog.d/5975.misc
+++ /dev/null
@@ -1 +0,0 @@
-Cleanup event auth type initialisation.
\ No newline at end of file
diff --git a/changelog.d/5979.feature b/changelog.d/5979.feature
deleted file mode 100644
index 94888aa2d..000000000
--- a/changelog.d/5979.feature
+++ /dev/null
@@ -1 +0,0 @@
-Use the v2 Identity Service API for 3PID invites.
\ No newline at end of file
diff --git a/changelog.d/5980.feature b/changelog.d/5980.feature
deleted file mode 100644
index e20117cf1..000000000
--- a/changelog.d/5980.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account.
diff --git a/changelog.d/5981.feature b/changelog.d/5981.feature
deleted file mode 100644
index e39514273..000000000
--- a/changelog.d/5981.feature
+++ /dev/null
@@ -1 +0,0 @@
-Setting metrics_flags.known_servers to True in the configuration will publish the synapse_federation_known_servers metric over Prometheus. This represents the total number of servers your server knows about (i.e. is in rooms with), including itself.
diff --git a/changelog.d/5982.bugfix b/changelog.d/5982.bugfix
deleted file mode 100644
index 3ea281a3a..000000000
--- a/changelog.d/5982.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Include missing opentracing contexts in outbout replication requests.
diff --git a/changelog.d/5983.feature b/changelog.d/5983.feature
deleted file mode 100644
index aa23ee6dc..000000000
--- a/changelog.d/5983.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add minimum opentracing for client servlets.
diff --git a/changelog.d/5984.bugfix b/changelog.d/5984.bugfix
deleted file mode 100644
index 3387bf82b..000000000
--- a/changelog.d/5984.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix sending of EDUs when opentracing is enabled with an empty whitelist.
diff --git a/changelog.d/5985.feature b/changelog.d/5985.feature
deleted file mode 100644
index e5e29504a..000000000
--- a/changelog.d/5985.feature
+++ /dev/null
@@ -1 +0,0 @@
-Check at setup that opentracing is installed if it's enabled in the config.
diff --git a/changelog.d/5986.feature b/changelog.d/5986.feature
deleted file mode 100644
index f56aec1b3..000000000
--- a/changelog.d/5986.feature
+++ /dev/null
@@ -1 +0,0 @@
-Trace replication send times.
diff --git a/changelog.d/5988.bugfix b/changelog.d/5988.bugfix
deleted file mode 100644
index 5c3597cb5..000000000
--- a/changelog.d/5988.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix invalid references to None while opentracing if the log context slips.
diff --git a/changelog.d/5989.misc b/changelog.d/5989.misc
deleted file mode 100644
index 9f2525fd3..000000000
--- a/changelog.d/5989.misc
+++ /dev/null
@@ -1 +0,0 @@
-Clean up dependency checking at setup.
diff --git a/changelog.d/5991.bugfix b/changelog.d/5991.bugfix
deleted file mode 100644
index 5c3597cb5..000000000
--- a/changelog.d/5991.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix invalid references to None while opentracing if the log context slips.
diff --git a/changelog.d/5992.feature b/changelog.d/5992.feature
deleted file mode 100644
index 31866c292..000000000
--- a/changelog.d/5992.feature
+++ /dev/null
@@ -1 +0,0 @@
-Give appropriate exit codes when synctl fails.
diff --git a/changelog.d/5993.feature b/changelog.d/5993.feature
deleted file mode 100644
index 3e8bf5068..000000000
--- a/changelog.d/5993.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add the ability to send registration emails from the homeserver rather than delegating to an identity server.
diff --git a/changelog.d/5994.feature b/changelog.d/5994.feature
deleted file mode 100644
index 5b69b97fe..000000000
--- a/changelog.d/5994.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add the ability to send registration emails from the homeserver rather than delegating to an identity server.
\ No newline at end of file
diff --git a/changelog.d/5995.bugfix b/changelog.d/5995.bugfix
deleted file mode 100644
index e03ab98bc..000000000
--- a/changelog.d/5995.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Return a M_MISSING_PARAM if `sid` is not provided to `/account/3pid`.
\ No newline at end of file
diff --git a/changelog.d/5996.bugfix b/changelog.d/5996.bugfix
deleted file mode 100644
index 05e31faaa..000000000
--- a/changelog.d/5996.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-federation_certificate_verification_whitelist now will not cause TypeErrors to be raised (a regression in 1.3). Additionally, it now supports internationalised domain names in their non-canonical representation.
diff --git a/changelog.d/5998.bugfix b/changelog.d/5998.bugfix
deleted file mode 100644
index 9ea095103..000000000
--- a/changelog.d/5998.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix room and user stats tracking.
diff --git a/changelog.d/6000.feature b/changelog.d/6000.feature
deleted file mode 100644
index 0a159bd10..000000000
--- a/changelog.d/6000.feature
+++ /dev/null
@@ -1 +0,0 @@
-Apply the federation blacklist to requests to identity servers.
\ No newline at end of file
diff --git a/changelog.d/6003.misc b/changelog.d/6003.misc
deleted file mode 100644
index 4152d05f8..000000000
--- a/changelog.d/6003.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add opentracing span over HTTP push processing.
diff --git a/changelog.d/6004.bugfix b/changelog.d/6004.bugfix
deleted file mode 100644
index 45c179c8f..000000000
--- a/changelog.d/6004.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Only count real users when checking for auto-creation of auto-join room.
diff --git a/changelog.d/6005.feature b/changelog.d/6005.feature
deleted file mode 100644
index ed6491d3e..000000000
--- a/changelog.d/6005.feature
+++ /dev/null
@@ -1 +0,0 @@
-The new Prometheus metric `synapse_build_info` exposes the Python version, OS version, and Synapse version of the running server.
diff --git a/changelog.d/6009.misc b/changelog.d/6009.misc
deleted file mode 100644
index fea479e1d..000000000
--- a/changelog.d/6009.misc
+++ /dev/null
@@ -1 +0,0 @@
-Small refactor of function arguments and docstrings in RoomMemberHandler.
\ No newline at end of file
diff --git a/changelog.d/6010.misc b/changelog.d/6010.misc
deleted file mode 100644
index 0659f12eb..000000000
--- a/changelog.d/6010.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove unused `origin` argument on FederationHandler.add_display_name_to_third_party_invite.
\ No newline at end of file
diff --git a/changelog.d/6011.feature b/changelog.d/6011.feature
deleted file mode 100644
index ad16acb12..000000000
--- a/changelog.d/6011.feature
+++ /dev/null
@@ -1 +0,0 @@
-Use account_threepid_delegate.email and account_threepid_delegate.msisdn for validating threepid sessions.
\ No newline at end of file
diff --git a/changelog.d/6012.feature b/changelog.d/6012.feature
deleted file mode 100644
index 25425510c..000000000
--- a/changelog.d/6012.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add report_stats_endpoint option to configure where stats are reported to, if enabled. Contributed by @Sorunome.
diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc
deleted file mode 100644
index 939fe8c65..000000000
--- a/changelog.d/6013.misc
+++ /dev/null
@@ -1 +0,0 @@
-Compatibility with v2 Identity Service APIs other than /lookup.
\ No newline at end of file
diff --git a/changelog.d/6015.feature b/changelog.d/6015.feature
deleted file mode 100644
index 42aaffced..000000000
--- a/changelog.d/6015.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add config option to increase ratelimits for room admins redacting messages.
diff --git a/changelog.d/6016.misc b/changelog.d/6016.misc
deleted file mode 100644
index 91cf16471..000000000
--- a/changelog.d/6016.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add a 'failure_ts' column to the 'destinations' database table.
diff --git a/changelog.d/6017.misc b/changelog.d/6017.misc
deleted file mode 100644
index 5ccab9c6c..000000000
--- a/changelog.d/6017.misc
+++ /dev/null
@@ -1 +0,0 @@
-Clean up some code in the retry logic.
diff --git a/changelog.d/6020.bugfix b/changelog.d/6020.bugfix
deleted file mode 100644
index 58a7deba9..000000000
--- a/changelog.d/6020.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Ensure support users can be registered even if MAU limit is reached.
diff --git a/changelog.d/6023.misc b/changelog.d/6023.misc
deleted file mode 100644
index d80410c22..000000000
--- a/changelog.d/6023.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix the structured logging tests stomping on the global log configuration for subsequent tests.
diff --git a/changelog.d/6024.bugfix b/changelog.d/6024.bugfix
deleted file mode 100644
index ddad34595..000000000
--- a/changelog.d/6024.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug where login error was shown incorrectly on SSO fallback login.
diff --git a/changelog.d/6025.bugfix b/changelog.d/6025.bugfix
deleted file mode 100644
index 50d7f9aab..000000000
--- a/changelog.d/6025.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug in calculating the federation retry backoff period.
\ No newline at end of file
diff --git a/changelog.d/6026.feature b/changelog.d/6026.feature
deleted file mode 100644
index 2489ff09b..000000000
--- a/changelog.d/6026.feature
+++ /dev/null
@@ -1 +0,0 @@
-Stop sending federation transactions to servers which have been down for a long time.
diff --git a/changelog.d/6027.doc b/changelog.d/6027.doc
deleted file mode 100644
index f0af68f3b..000000000
--- a/changelog.d/6027.doc
+++ /dev/null
@@ -1 +0,0 @@
-Clarify Synapse 1.4.0 upgrade notes.
diff --git a/changelog.d/6028.feature b/changelog.d/6028.feature
deleted file mode 100644
index cf603fa0c..000000000
--- a/changelog.d/6028.feature
+++ /dev/null
@@ -1 +0,0 @@
-Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`.
diff --git a/changelog.d/6029.bugfix b/changelog.d/6029.bugfix
deleted file mode 100644
index 9ea095103..000000000
--- a/changelog.d/6029.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix room and user stats tracking.
diff --git a/changelog.d/6032.misc b/changelog.d/6032.misc
deleted file mode 100644
index ec5b5eb88..000000000
--- a/changelog.d/6032.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add developer documentation for using SAML2.
diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature
deleted file mode 100644
index 85553d2da..000000000
--- a/changelog.d/6037.feature
+++ /dev/null
@@ -1 +0,0 @@
-Make the process for mapping SAML2 users to matrix IDs more flexible.
diff --git a/changelog.d/6042.feature b/changelog.d/6042.feature
deleted file mode 100644
index a73776036..000000000
--- a/changelog.d/6042.feature
+++ /dev/null
@@ -1 +0,0 @@
-Allow homeserver to handle or delegate email validation when adding an email to a user's account.
diff --git a/changelog.d/6043.feature b/changelog.d/6043.feature
deleted file mode 100644
index cd27b0400..000000000
--- a/changelog.d/6043.feature
+++ /dev/null
@@ -1 +0,0 @@
-Implement new Client Server API endpoints `/account/3pid/add` and `/account/3pid/bind` as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290).
\ No newline at end of file
diff --git a/changelog.d/6044.feature b/changelog.d/6044.feature
deleted file mode 100644
index 7dc05d484..000000000
--- a/changelog.d/6044.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add an unstable feature flag for separate add/bind 3pid APIs.
\ No newline at end of file
diff --git a/changelog.d/6047.misc b/changelog.d/6047.misc
deleted file mode 100644
index a4cdb8abb..000000000
--- a/changelog.d/6047.misc
+++ /dev/null
@@ -1,2 +0,0 @@
-Stop populating some unused tables.
-
diff --git a/changelog.d/6049.doc b/changelog.d/6049.doc
deleted file mode 100644
index e0307bf5c..000000000
--- a/changelog.d/6049.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add some notes on rolling back to v1.3.1.
diff --git a/changelog.d/6050.doc b/changelog.d/6050.doc
deleted file mode 100644
index 3d19c69bc..000000000
--- a/changelog.d/6050.doc
+++ /dev/null
@@ -1 +0,0 @@
-Update the upgrade notes.
diff --git a/changelog.d/6053.bugfix b/changelog.d/6053.bugfix
deleted file mode 100644
index 6311157bf..000000000
--- a/changelog.d/6053.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Prevent exceptions being logged when extremity-cleanup events fail due to lack of user consent to the terms of service.
diff --git a/changelog.d/6056.bugfix b/changelog.d/6056.bugfix
deleted file mode 100644
index 4d9573a58..000000000
--- a/changelog.d/6056.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Remove POST method from password reset submit_token endpoint until we implement submit_url functionality.
\ No newline at end of file
diff --git a/changelog.d/6058.docker b/changelog.d/6058.docker
deleted file mode 100644
index 30be6933c..000000000
--- a/changelog.d/6058.docker
+++ /dev/null
@@ -1 +0,0 @@
-Provide SYNAPSE_WORKER envvar to specify python module.
diff --git a/changelog.d/6059.bugfix b/changelog.d/6059.bugfix
deleted file mode 100644
index 49d5bd3fa..000000000
--- a/changelog.d/6059.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix logcontext spam on non-Linux platforms.
diff --git a/changelog.d/6062.bugfix b/changelog.d/6062.bugfix
deleted file mode 100644
index e20117cf1..000000000
--- a/changelog.d/6062.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Add POST /_matrix/client/unstable/account/3pid/unbind endpoint from MSC2140 for unbinding a 3PID from an identity server without removing it from the homeserver user account.
diff --git a/changelog.d/6063.bugfix b/changelog.d/6063.bugfix
deleted file mode 100644
index 7485e32a2..000000000
--- a/changelog.d/6063.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Ensure query parameters in email validation links are URL-encoded.
\ No newline at end of file
diff --git a/changelog.d/6064.misc b/changelog.d/6064.misc
deleted file mode 100644
index 28dc89111..000000000
--- a/changelog.d/6064.misc
+++ /dev/null
@@ -1 +0,0 @@
-Clean up the sample config for SAML authentication.
diff --git a/changelog.d/6067.feature b/changelog.d/6067.feature
deleted file mode 100644
index 72685961c..000000000
--- a/changelog.d/6067.feature
+++ /dev/null
@@ -1 +0,0 @@
-Remove `bind` parameter from Client Server POST `/account` endpoint as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/).
\ No newline at end of file
diff --git a/changelog.d/6069.bugfix b/changelog.d/6069.bugfix
deleted file mode 100644
index a437ac41a..000000000
--- a/changelog.d/6069.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug which caused SAML attribute maps to be overridden by defaults.
diff --git a/changelog.d/6072.misc b/changelog.d/6072.misc
deleted file mode 100644
index 91cf16471..000000000
--- a/changelog.d/6072.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add a 'failure_ts' column to the 'destinations' database table.
diff --git a/changelog.d/6073.feature b/changelog.d/6073.feature
deleted file mode 100644
index 15d993389..000000000
--- a/changelog.d/6073.feature
+++ /dev/null
@@ -1 +0,0 @@
-Return a clearer error message when a timeout occurs when attempting to contact an identity server.
\ No newline at end of file
diff --git a/changelog.d/6074.feature b/changelog.d/6074.feature
deleted file mode 100644
index b7aa9c99d..000000000
--- a/changelog.d/6074.feature
+++ /dev/null
@@ -1 +0,0 @@
-Prevent password reset's submit_token endpoint from accepting trailing slashes.
\ No newline at end of file
diff --git a/changelog.d/6075.misc b/changelog.d/6075.misc
deleted file mode 100644
index 914e56bcf..000000000
--- a/changelog.d/6075.misc
+++ /dev/null
@@ -1 +0,0 @@
-Change mailer logging to reflect Synapse doesn't just do chat notifications by email now.
\ No newline at end of file
diff --git a/changelog.d/6078.feature b/changelog.d/6078.feature
deleted file mode 100644
index fae1e5232..000000000
--- a/changelog.d/6078.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an account_threepid_handler.
\ No newline at end of file
diff --git a/changelog.d/6079.feature b/changelog.d/6079.feature
deleted file mode 100644
index bcbb49ac5..000000000
--- a/changelog.d/6079.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `submit_url` response parameter to `*/msisdn/requestToken` endpoints.
diff --git a/changelog.d/6082.feature b/changelog.d/6082.feature
deleted file mode 100644
index c30662b60..000000000
--- a/changelog.d/6082.feature
+++ /dev/null
@@ -1 +0,0 @@
-Return 403 on `/register/available` if registration has been disabled.
\ No newline at end of file
diff --git a/changelog.d/6089.misc b/changelog.d/6089.misc
deleted file mode 100644
index fa3c197c5..000000000
--- a/changelog.d/6089.misc
+++ /dev/null
@@ -1 +0,0 @@
-Move last seen info into devices table.
diff --git a/changelog.d/6090.feature b/changelog.d/6090.feature
deleted file mode 100644
index a6da448a1..000000000
--- a/changelog.d/6090.feature
+++ /dev/null
@@ -1 +0,0 @@
-Explicitly log when a homeserver does not have the 'trusted_key_servers' config field configured.
diff --git a/changelog.d/6092.bugfix b/changelog.d/6092.bugfix
deleted file mode 100644
index 01a7498ec..000000000
--- a/changelog.d/6092.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix the logged number of updated items for the users_set_deactivated_flag background update.
diff --git a/changelog.d/6097.bugfix b/changelog.d/6097.bugfix
deleted file mode 100644
index 750a8ecf0..000000000
--- a/changelog.d/6097.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Add sid to next_link for email validation.
diff --git a/changelog.d/6098.feature b/changelog.d/6098.feature
deleted file mode 100644
index f3c693c06..000000000
--- a/changelog.d/6098.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add support for pruning old rows in `user_ips` table.
diff --git a/changelog.d/6099.misc b/changelog.d/6099.misc
deleted file mode 100644
index 8415c6759..000000000
--- a/changelog.d/6099.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove unused parameter to get_user_id_by_threepid.
diff --git a/changelog.d/6104.bugfix b/changelog.d/6104.bugfix
deleted file mode 100644
index 41114a66e..000000000
--- a/changelog.d/6104.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Threepid validity checks on msisdns should not be dependent on 'threepid_behaviour_email'.
diff --git a/changelog.d/6105.misc b/changelog.d/6105.misc
deleted file mode 100644
index 2e838a35c..000000000
--- a/changelog.d/6105.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor the user-interactive auth handling.
diff --git a/changelog.d/6106.misc b/changelog.d/6106.misc
deleted file mode 100644
index d73209177..000000000
--- a/changelog.d/6106.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor code for calculating registration flows.
diff --git a/changelog.d/6107.bugfix b/changelog.d/6107.bugfix
deleted file mode 100644
index d4b9516ac..000000000
--- a/changelog.d/6107.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Ensure that servers which are not configured to support email address verification do not offer it in the registration flows.
\ No newline at end of file

From 5384c43626a2a54196b3bf08d49a0101141d39fb Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Sep 2019 13:38:56 +0100
Subject: [PATCH 122/276] Changelog formatting

---
 CHANGES.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 9f610e4c1..86261b27d 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -15,13 +15,12 @@ Features
   - Add `m.id_access_token` to `unstable_features` in `/versions` as per [MSC2264](https://github.com/matrix-org/matrix-doc/pull/2264). ([\#5974](https://github.com/matrix-org/synapse/issues/5974))
   - Use the v2 Identity Service API for 3PID invites. ([\#5979](https://github.com/matrix-org/synapse/issues/5979))
   - Add `POST /_matrix/client/unstable/account/3pid/unbind` endpoint from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140) for unbinding a 3PID from an identity server without removing it from the homeserver user account. ([\#5980](https://github.com/matrix-org/synapse/issues/5980), [\#6062](https://github.com/matrix-org/synapse/issues/6062))
-)
   - Use `account_threepid_delegate.email` and `account_threepid_delegate.msisdn` for validating threepid sessions. ([\#6011](https://github.com/matrix-org/synapse/issues/6011))
   - Allow homeserver to handle or delegate email validation when adding an email to a user's account. ([\#6042](https://github.com/matrix-org/synapse/issues/6042))
   - Implement new Client Server API endpoints `/account/3pid/add` and `/account/3pid/bind` as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290). ([\#6043](https://github.com/matrix-org/synapse/issues/6043))
   - Add an unstable feature flag for separate add/bind 3pid APIs. ([\#6044](https://github.com/matrix-org/synapse/issues/6044))
   - Remove `bind` parameter from Client Server POST `/account` endpoint as per [MSC2290](https://github.com/matrix-org/matrix-doc/pull/2290/). ([\#6067](https://github.com/matrix-org/synapse/issues/6067))
-  - Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an account_threepid_handler. ([\#6078](https://github.com/matrix-org/synapse/issues/6078))
+  - Add `POST /add_threepid/msisdn/submit_token` endpoint for proxying submitToken on an `account_threepid_handler`. ([\#6078](https://github.com/matrix-org/synapse/issues/6078))
   - Add `submit_url` response parameter to `*/msisdn/requestToken` endpoints. ([\#6079](https://github.com/matrix-org/synapse/issues/6079))
   - Add `m.require_identity_server` flag to /version's unstable_features. ([\#5972](https://github.com/matrix-org/synapse/issues/5972))
 - Enhancements to OpenTracing support:

From e04c235907968f19788eba1be59169cf08e1df08 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Sep 2019 14:59:01 +0100
Subject: [PATCH 123/276] more changelog updates

---
 CHANGES.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 86261b27d..addc4c4b5 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -9,8 +9,8 @@ Features
 
 - Changes to 3pid verification:
   - Add the ability to send registration emails from the homeserver rather than delegating to an identity server. ([\#5835](https://github.com/matrix-org/synapse/issues/5835), [\#5940](https://github.com/matrix-org/synapse/issues/5940), [\#5993](https://github.com/matrix-org/synapse/issues/5993), [\#5994](https://github.com/matrix-org/synapse/issues/5994), [\#5868](https://github.com/matrix-org/synapse/issues/5868))
-  - Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`. ([\#5876](https://github.com/matrix-org/synapse/issues/5876), [\#5969](https://github.com/matrix-org/synapse/issues/5969), [\#6028](https://github.com/matrix-org/synapse/issues/6028))
-  - Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus id_access_token authentication for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)). ([\#5897](https://github.com/matrix-org/synapse/issues/5897))
+  - Replace `trust_identity_server_for_password_resets` config option with `account_threepid_delegates`, and make the `id_server` parameteter optional on `*/requestToken` endpoints, as per [MSC2263](https://github.com/matrix-org/matrix-doc/pull/2263). ([\#5876](https://github.com/matrix-org/synapse/issues/5876), [\#5969](https://github.com/matrix-org/synapse/issues/5969), [\#6028](https://github.com/matrix-org/synapse/issues/6028))
+  - Switch to using the v2 Identity Service `/lookup` API where available, with fallback to v1. (Implements [MSC2134](https://github.com/matrix-org/matrix-doc/pull/2134) plus `id_access_token authentication` for v2 Identity Service APIs from [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140)). ([\#5897](https://github.com/matrix-org/synapse/issues/5897))
   - Remove `bind_email` and `bind_msisdn` parameters from `/register` ala [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140). ([\#5964](https://github.com/matrix-org/synapse/issues/5964))
   - Add `m.id_access_token` to `unstable_features` in `/versions` as per [MSC2264](https://github.com/matrix-org/matrix-doc/pull/2264). ([\#5974](https://github.com/matrix-org/synapse/issues/5974))
   - Use the v2 Identity Service API for 3PID invites. ([\#5979](https://github.com/matrix-org/synapse/issues/5979))
@@ -88,8 +88,8 @@ Bugfixes
 - Fix logcontext spam on non-Linux platforms. ([\#6059](https://github.com/matrix-org/synapse/issues/6059))
 - Ensure query parameters in email validation links are URL-encoded. ([\#6063](https://github.com/matrix-org/synapse/issues/6063))
 - Fix a bug which caused SAML attribute maps to be overridden by defaults. ([\#6069](https://github.com/matrix-org/synapse/issues/6069))
-- Fix the logged number of updated items for the users_set_deactivated_flag background update. ([\#6092](https://github.com/matrix-org/synapse/issues/6092))
-- Add sid to `next_link` for email validation. ([\#6097](https://github.com/matrix-org/synapse/issues/6097))
+- Fix the logged number of updated items for the `users_set_deactivated_flag` background update. ([\#6092](https://github.com/matrix-org/synapse/issues/6092))
+- Add `sid` to `next_link` for email validation. ([\#6097](https://github.com/matrix-org/synapse/issues/6097))
 - Threepid validity checks on msisdns should not be dependent on `threepid_behaviour_email`. ([\#6104](https://github.com/matrix-org/synapse/issues/6104))
 - Ensure that servers which are not configured to support email address verification do not offer it in the registration flows. ([\#6107](https://github.com/matrix-org/synapse/issues/6107))
 
@@ -97,8 +97,8 @@ Bugfixes
 Updates to the Docker image
 ---------------------------
 
-- Avoid changing UID/GID if they are already correct. ([\#5970](https://github.com/matrix-org/synapse/issues/5970))
-- Provide SYNAPSE_WORKER envvar to specify python module. ([\#6058](https://github.com/matrix-org/synapse/issues/6058))
+- Avoid changing `UID/GID` if they are already correct. ([\#5970](https://github.com/matrix-org/synapse/issues/5970))
+- Provide `SYNAPSE_WORKER` envvar to specify python module. ([\#6058](https://github.com/matrix-org/synapse/issues/6058))
 
 
 Improved Documentation
@@ -127,7 +127,7 @@ Internal Changes
 - Refactor the Appservice scheduler code. ([\#5886](https://github.com/matrix-org/synapse/issues/5886))
 - Compatibility with v2 Identity Service APIs other than /lookup. ([\#5892](https://github.com/matrix-org/synapse/issues/5892), [\#6013](https://github.com/matrix-org/synapse/issues/6013))
 - Stop populating some unused tables. ([\#5893](https://github.com/matrix-org/synapse/issues/5893), [\#6047](https://github.com/matrix-org/synapse/issues/6047))
-- Add missing index on users_in_public_rooms to improve the performance of directory queries. ([\#5894](https://github.com/matrix-org/synapse/issues/5894))
+- Add missing index on `users_in_public_rooms` to improve the performance of directory queries. ([\#5894](https://github.com/matrix-org/synapse/issues/5894))
 - Improve the logging when we have an error when fetching signing keys. ([\#5896](https://github.com/matrix-org/synapse/issues/5896))
 - Add support for database engine-specific schema deltas, based on file extension. ([\#5911](https://github.com/matrix-org/synapse/issues/5911))
 - Update Buildkite pipeline to use plugins instead of buildkite-agent commands. ([\#5922](https://github.com/matrix-org/synapse/issues/5922))

From e75512800657973f77255ac41180682414c74ed2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 26 Sep 2019 15:29:31 +0100
Subject: [PATCH 124/276] Update CHANGES.md

formatting tweak
---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index addc4c4b5..d3d4d69cd 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,7 +2,7 @@ Synapse 1.4.0rc1 (2019-09-26)
 =============================
 
 Note that this release includes significant changes around 3pid
-verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst##upgrading-to-v140).
+verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst#upgrading-to-v140).
 
 Features
 --------

From 54569c787b4abbc5674d9c23c012b56d8cc156ef Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 26 Sep 2019 15:38:25 +0100
Subject: [PATCH 125/276] Kill off half-implemented password-reset via sms
 (#6101)

Doing a password reset via SMS has never worked, and in any case is a silly
idea because msisdn recycling is a thing.

See also matrix-org/matrix-doc#2303.
---
 changelog.d/6101.misc                   |  1 +
 synapse/rest/client/v2_alpha/account.py | 65 +------------------------
 2 files changed, 2 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/6101.misc

diff --git a/changelog.d/6101.misc b/changelog.d/6101.misc
new file mode 100644
index 000000000..9743abb9e
--- /dev/null
+++ b/changelog.d/6101.misc
@@ -0,0 +1 @@
+Kill off half-implemented password-reset via sms.
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index f99676fd3..80cf7126a 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -129,66 +129,6 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
         return 200, ret
 
 
-class MsisdnPasswordRequestTokenRestServlet(RestServlet):
-    PATTERNS = client_patterns("/account/password/msisdn/requestToken$")
-
-    def __init__(self, hs):
-        super(MsisdnPasswordRequestTokenRestServlet, self).__init__()
-        self.hs = hs
-        self.datastore = self.hs.get_datastore()
-        self.identity_handler = hs.get_handlers().identity_handler
-
-    @defer.inlineCallbacks
-    def on_POST(self, request):
-        body = parse_json_object_from_request(request)
-
-        assert_params_in_dict(
-            body, ["client_secret", "country", "phone_number", "send_attempt"]
-        )
-        client_secret = body["client_secret"]
-        country = body["country"]
-        phone_number = body["phone_number"]
-        send_attempt = body["send_attempt"]
-        next_link = body.get("next_link")  # Optional param
-
-        msisdn = phone_number_to_msisdn(country, phone_number)
-
-        if not check_3pid_allowed(self.hs, "msisdn", msisdn):
-            raise SynapseError(
-                403,
-                "Account phone numbers are not authorized on this server",
-                Codes.THREEPID_DENIED,
-            )
-
-        existing_user_id = yield self.datastore.get_user_id_by_threepid(
-            "msisdn", msisdn
-        )
-
-        if existing_user_id is None:
-            raise SynapseError(400, "MSISDN not found", Codes.THREEPID_NOT_FOUND)
-
-        if not self.hs.config.account_threepid_delegate_msisdn:
-            logger.warn(
-                "No upstream msisdn account_threepid_delegate configured on the server to "
-                "handle this request"
-            )
-            raise SynapseError(
-                400,
-                "Password reset by phone number is not supported on this homeserver",
-            )
-
-        ret = yield self.identity_handler.requestMsisdnToken(
-            self.hs.config.account_threepid_delegate_msisdn,
-            country,
-            phone_number,
-            client_secret,
-            send_attempt,
-            next_link,
-        )
-
-        return 200, ret
-
-
 class PasswordResetSubmitTokenServlet(RestServlet):
     """Handles 3PID validation token submission"""
 
@@ -301,9 +241,7 @@ class PasswordRestServlet(RestServlet):
         else:
             requester = None
             result, params, _ = yield self.auth_handler.check_auth(
-                [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]],
-                body,
-                self.hs.get_ip_from_request(request),
+                [[LoginType.EMAIL_IDENTITY]], body, self.hs.get_ip_from_request(request)
             )
 
             if LoginType.EMAIL_IDENTITY in result:
@@ -843,7 +781,6 @@ class WhoamiRestServlet(RestServlet):
 
 def register_servlets(hs, http_server):
     EmailPasswordRequestTokenRestServlet(hs).register(http_server)
-    MsisdnPasswordRequestTokenRestServlet(hs).register(http_server)
     PasswordResetSubmitTokenServlet(hs).register(http_server)
     PasswordRestServlet(hs).register(http_server)
     DeactivateAccountRestServlet(hs).register(http_server)

From f25c5ee1feb2b8c026f88737e3adbd31cf89a79a Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Thu, 26 Sep 2019 15:57:02 +0100
Subject: [PATCH 126/276] s/month/week/ to match config

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index d3d4d69cd..0a0d0b343 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -48,7 +48,7 @@ Features
 - Let synctl accept a directory of config files. ([\#5904](https://github.com/matrix-org/synapse/issues/5904))
 - Increase max display name size to 256. ([\#5906](https://github.com/matrix-org/synapse/issues/5906))
 - Add admin API endpoint for getting whether or not a user is a server administrator. ([\#5914](https://github.com/matrix-org/synapse/issues/5914))
-- Redact events in the database that have been redacted for a month. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
+- Redact events in the database that have been redacted for a week. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
 - New prometheus metrics:
   - `synapse_federation_known_servers`: represents the total number of servers your server knows about (i.e. is in rooms with), including itself. Enable by setting `metrics_flags.known_servers` to True in the configuration.([\#5981](https://github.com/matrix-org/synapse/issues/5981))
   - `synapse_build_info`: exposes the Python version, OS version, and Synapse version of the running server. ([\#6005](https://github.com/matrix-org/synapse/issues/6005))

From 3423633d50723a818975317c33545385b53b372f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 26 Sep 2019 16:39:15 +0100
Subject: [PATCH 127/276] Fix 'redaction_retention_period' sampel config to
 match guidelines

---
 docs/sample_config.yaml  | 2 +-
 synapse/config/server.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 254e1b17b..43893399a 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -314,7 +314,7 @@ listeners:
 #
 # Defaults to `7d`. Set to `null` to disable.
 #
-redaction_retention_period: 7d
+#redaction_retention_period: 28d
 
 # How long to track users' last seen time and IPs in the database.
 #
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 5ad7ee911..536ee7f29 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -742,7 +742,7 @@ class ServerConfig(Config):
         #
         # Defaults to `7d`. Set to `null` to disable.
         #
-        redaction_retention_period: 7d
+        #redaction_retention_period: 28d
 
         # How long to track users' last seen time and IPs in the database.
         #

From 9d99eade7c8152956d6f0198d4ede1baeadedfe7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 26 Sep 2019 16:46:21 +0100
Subject: [PATCH 128/276] Newsfile

---
 changelog.d/6117.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6117.misc

diff --git a/changelog.d/6117.misc b/changelog.d/6117.misc
new file mode 100644
index 000000000..f8bdb58f4
--- /dev/null
+++ b/changelog.d/6117.misc
@@ -0,0 +1 @@
+Fix up sample config entry for `redaction_retention_period` option.

From 8c27bc8b60d4b78c059ea727a78e78dc8cd3df7a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 27 Sep 2019 10:36:20 +0100
Subject: [PATCH 129/276] Move lookup-related functions from RoomMemberHandler
 to IdentityHandler (#5978)

Just to have all the methods that make calls to identity services in one place.
---
 changelog.d/5978.misc           |   1 +
 synapse/handlers/identity.py    | 353 ++++++++++++++++++++++++++++++
 synapse/handlers/room_member.py | 370 +-------------------------------
 3 files changed, 360 insertions(+), 364 deletions(-)
 create mode 100644 changelog.d/5978.misc

diff --git a/changelog.d/5978.misc b/changelog.d/5978.misc
new file mode 100644
index 000000000..6d2b69b11
--- /dev/null
+++ b/changelog.d/5978.misc
@@ -0,0 +1 @@
+Move lookup-related functions from RoomMemberHandler to IdentityHandler.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 6d42a1aed..ba99ddf76 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -21,11 +21,15 @@ import logging
 import urllib
 
 from canonicaljson import json
+from signedjson.key import decode_verify_key_bytes
+from signedjson.sign import verify_signed_json
+from unpaddedbase64 import decode_base64
 
 from twisted.internet import defer
 from twisted.internet.error import TimeoutError
 
 from synapse.api.errors import (
+    AuthError,
     CodeMessageException,
     Codes,
     HttpResponseException,
@@ -33,12 +37,15 @@ from synapse.api.errors import (
 )
 from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.client import SimpleHttpClient
+from synapse.util.hash import sha256_and_url_safe_base64
 from synapse.util.stringutils import random_string
 
 from ._base import BaseHandler
 
 logger = logging.getLogger(__name__)
 
+id_server_scheme = "https://"
+
 
 class IdentityHandler(BaseHandler):
     def __init__(self, hs):
@@ -557,6 +564,352 @@ class IdentityHandler(BaseHandler):
             logger.warning("Error contacting msisdn account_threepid_delegate: %s", e)
             raise SynapseError(400, "Error contacting the identity server")
 
+    @defer.inlineCallbacks
+    def lookup_3pid(self, id_server, medium, address, id_access_token=None):
+        """Looks up a 3pid in the passed identity server.
+
+        Args:
+            id_server (str): The server name (including port, if required)
+                of the identity server to use.
+            medium (str): The type of the third party identifier (e.g. "email").
+            address (str): The third party identifier (e.g. "foo@example.com").
+            id_access_token (str|None): The access token to authenticate to the identity
+                server with
+
+        Returns:
+            str|None: the matrix ID of the 3pid, or None if it is not recognized.
+        """
+        if id_access_token is not None:
+            try:
+                results = yield self._lookup_3pid_v2(
+                    id_server, id_access_token, medium, address
+                )
+                return results
+
+            except Exception as e:
+                # Catch HttpResponseExcept for a non-200 response code
+                # Check if this identity server does not know about v2 lookups
+                if isinstance(e, HttpResponseException) and e.code == 404:
+                    # This is an old identity server that does not yet support v2 lookups
+                    logger.warning(
+                        "Attempted v2 lookup on v1 identity server %s. Falling "
+                        "back to v1",
+                        id_server,
+                    )
+                else:
+                    logger.warning("Error when looking up hashing details: %s", e)
+                    return None
+
+        return (yield self._lookup_3pid_v1(id_server, medium, address))
+
+    @defer.inlineCallbacks
+    def _lookup_3pid_v1(self, id_server, medium, address):
+        """Looks up a 3pid in the passed identity server using v1 lookup.
+
+        Args:
+            id_server (str): The server name (including port, if required)
+                of the identity server to use.
+            medium (str): The type of the third party identifier (e.g. "email").
+            address (str): The third party identifier (e.g. "foo@example.com").
+
+        Returns:
+            str: the matrix ID of the 3pid, or None if it is not recognized.
+        """
+        try:
+            data = yield self.blacklisting_http_client.get_json(
+                "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server),
+                {"medium": medium, "address": address},
+            )
+
+            if "mxid" in data:
+                if "signatures" not in data:
+                    raise AuthError(401, "No signatures on 3pid binding")
+                yield self._verify_any_signature(data, id_server)
+                return data["mxid"]
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
+        except IOError as e:
+            logger.warning("Error from v1 identity server lookup: %s" % (e,))
+
+        return None
+
+    @defer.inlineCallbacks
+    def _lookup_3pid_v2(self, id_server, id_access_token, medium, address):
+        """Looks up a 3pid in the passed identity server using v2 lookup.
+
+        Args:
+            id_server (str): The server name (including port, if required)
+                of the identity server to use.
+            id_access_token (str): The access token to authenticate to the identity server with
+            medium (str): The type of the third party identifier (e.g. "email").
+            address (str): The third party identifier (e.g. "foo@example.com").
+
+        Returns:
+            Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised.
+        """
+        # Check what hashing details are supported by this identity server
+        try:
+            hash_details = yield self.blacklisting_http_client.get_json(
+                "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
+                {"access_token": id_access_token},
+            )
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
+
+        if not isinstance(hash_details, dict):
+            logger.warning(
+                "Got non-dict object when checking hash details of %s%s: %s",
+                id_server_scheme,
+                id_server,
+                hash_details,
+            )
+            raise SynapseError(
+                400,
+                "Non-dict object from %s%s during v2 hash_details request: %s"
+                % (id_server_scheme, id_server, hash_details),
+            )
+
+        # Extract information from hash_details
+        supported_lookup_algorithms = hash_details.get("algorithms")
+        lookup_pepper = hash_details.get("lookup_pepper")
+        if (
+            not supported_lookup_algorithms
+            or not isinstance(supported_lookup_algorithms, list)
+            or not lookup_pepper
+            or not isinstance(lookup_pepper, str)
+        ):
+            raise SynapseError(
+                400,
+                "Invalid hash details received from identity server %s%s: %s"
+                % (id_server_scheme, id_server, hash_details),
+            )
+
+        # Check if any of the supported lookup algorithms are present
+        if LookupAlgorithm.SHA256 in supported_lookup_algorithms:
+            # Perform a hashed lookup
+            lookup_algorithm = LookupAlgorithm.SHA256
+
+            # Hash address, medium and the pepper with sha256
+            to_hash = "%s %s %s" % (address, medium, lookup_pepper)
+            lookup_value = sha256_and_url_safe_base64(to_hash)
+
+        elif LookupAlgorithm.NONE in supported_lookup_algorithms:
+            # Perform a non-hashed lookup
+            lookup_algorithm = LookupAlgorithm.NONE
+
+            # Combine together plaintext address and medium
+            lookup_value = "%s %s" % (address, medium)
+
+        else:
+            logger.warning(
+                "None of the provided lookup algorithms of %s are supported: %s",
+                id_server,
+                supported_lookup_algorithms,
+            )
+            raise SynapseError(
+                400,
+                "Provided identity server does not support any v2 lookup "
+                "algorithms that this homeserver supports.",
+            )
+
+        # Authenticate with identity server given the access token from the client
+        headers = {"Authorization": create_id_access_token_header(id_access_token)}
+
+        try:
+            lookup_results = yield self.blacklisting_http_client.post_json_get_json(
+                "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server),
+                {
+                    "addresses": [lookup_value],
+                    "algorithm": lookup_algorithm,
+                    "pepper": lookup_pepper,
+                },
+                headers=headers,
+            )
+        except TimeoutError:
+            raise SynapseError(500, "Timed out contacting identity server")
+        except Exception as e:
+            logger.warning("Error when performing a v2 3pid lookup: %s", e)
+            raise SynapseError(
+                500, "Unknown error occurred during identity server lookup"
+            )
+
+        # Check for a mapping from what we looked up to an MXID
+        if "mappings" not in lookup_results or not isinstance(
+            lookup_results["mappings"], dict
+        ):
+            logger.warning("No results from 3pid lookup")
+            return None
+
+        # Return the MXID if it's available, or None otherwise
+        mxid = lookup_results["mappings"].get(lookup_value)
+        return mxid
+
+    @defer.inlineCallbacks
+    def _verify_any_signature(self, data, server_hostname):
+        if server_hostname not in data["signatures"]:
+            raise AuthError(401, "No signature from server %s" % (server_hostname,))
+        for key_name, signature in data["signatures"][server_hostname].items():
+            try:
+                key_data = yield self.blacklisting_http_client.get_json(
+                    "%s%s/_matrix/identity/api/v1/pubkey/%s"
+                    % (id_server_scheme, server_hostname, key_name)
+                )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
+            if "public_key" not in key_data:
+                raise AuthError(
+                    401, "No public key named %s from %s" % (key_name, server_hostname)
+                )
+            verify_signed_json(
+                data,
+                server_hostname,
+                decode_verify_key_bytes(
+                    key_name, decode_base64(key_data["public_key"])
+                ),
+            )
+            return
+
+    @defer.inlineCallbacks
+    def ask_id_server_for_third_party_invite(
+        self,
+        requester,
+        id_server,
+        medium,
+        address,
+        room_id,
+        inviter_user_id,
+        room_alias,
+        room_avatar_url,
+        room_join_rules,
+        room_name,
+        inviter_display_name,
+        inviter_avatar_url,
+        id_access_token=None,
+    ):
+        """
+        Asks an identity server for a third party invite.
+
+        Args:
+            requester (Requester)
+            id_server (str): hostname + optional port for the identity server.
+            medium (str): The literal string "email".
+            address (str): The third party address being invited.
+            room_id (str): The ID of the room to which the user is invited.
+            inviter_user_id (str): The user ID of the inviter.
+            room_alias (str): An alias for the room, for cosmetic notifications.
+            room_avatar_url (str): The URL of the room's avatar, for cosmetic
+                notifications.
+            room_join_rules (str): The join rules of the email (e.g. "public").
+            room_name (str): The m.room.name of the room.
+            inviter_display_name (str): The current display name of the
+                inviter.
+            inviter_avatar_url (str): The URL of the inviter's avatar.
+            id_access_token (str|None): The access token to authenticate to the identity
+                server with
+
+        Returns:
+            A deferred tuple containing:
+                token (str): The token which must be signed to prove authenticity.
+                public_keys ([{"public_key": str, "key_validity_url": str}]):
+                    public_key is a base64-encoded ed25519 public key.
+                fallback_public_key: One element from public_keys.
+                display_name (str): A user-friendly name to represent the invited
+                    user.
+        """
+        invite_config = {
+            "medium": medium,
+            "address": address,
+            "room_id": room_id,
+            "room_alias": room_alias,
+            "room_avatar_url": room_avatar_url,
+            "room_join_rules": room_join_rules,
+            "room_name": room_name,
+            "sender": inviter_user_id,
+            "sender_display_name": inviter_display_name,
+            "sender_avatar_url": inviter_avatar_url,
+        }
+
+        # Add the identity service access token to the JSON body and use the v2
+        # Identity Service endpoints if id_access_token is present
+        data = None
+        base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server)
+
+        if id_access_token:
+            key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % (
+                id_server_scheme,
+                id_server,
+            )
+
+            # Attempt a v2 lookup
+            url = base_url + "/v2/store-invite"
+            try:
+                data = yield self.blacklisting_http_client.post_json_get_json(
+                    url,
+                    invite_config,
+                    {"Authorization": create_id_access_token_header(id_access_token)},
+                )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
+            except HttpResponseException as e:
+                if e.code != 404:
+                    logger.info("Failed to POST %s with JSON: %s", url, e)
+                    raise e
+
+        if data is None:
+            key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % (
+                id_server_scheme,
+                id_server,
+            )
+            url = base_url + "/api/v1/store-invite"
+
+            try:
+                data = yield self.blacklisting_http_client.post_json_get_json(
+                    url, invite_config
+                )
+            except TimeoutError:
+                raise SynapseError(500, "Timed out contacting identity server")
+            except HttpResponseException as e:
+                logger.warning(
+                    "Error trying to call /store-invite on %s%s: %s",
+                    id_server_scheme,
+                    id_server,
+                    e,
+                )
+
+            if data is None:
+                # Some identity servers may only support application/x-www-form-urlencoded
+                # types. This is especially true with old instances of Sydent, see
+                # https://github.com/matrix-org/sydent/pull/170
+                try:
+                    data = yield self.blacklisting_http_client.post_urlencoded_get_json(
+                        url, invite_config
+                    )
+                except HttpResponseException as e:
+                    logger.warning(
+                        "Error calling /store-invite on %s%s with fallback "
+                        "encoding: %s",
+                        id_server_scheme,
+                        id_server,
+                        e,
+                    )
+                    raise e
+
+        # TODO: Check for success
+        token = data["token"]
+        public_keys = data.get("public_keys", [])
+        if "public_key" in data:
+            fallback_public_key = {
+                "public_key": data["public_key"],
+                "key_validity_url": key_validity_url,
+            }
+        else:
+            fallback_public_key = public_keys[0]
+
+        if not public_keys:
+            public_keys.append(fallback_public_key)
+        display_name = data["display_name"]
+        return token, public_keys, fallback_public_key, display_name
+
 
 def create_id_access_token_header(id_access_token):
     """Create an Authorization header for passing to SimpleHttpClient as the header value
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 94cd0cf3e..8abdb1b6e 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -20,29 +20,19 @@ import logging
 
 from six.moves import http_client
 
-from signedjson.key import decode_verify_key_bytes
-from signedjson.sign import verify_signed_json
-from unpaddedbase64 import decode_base64
-
 from twisted.internet import defer
-from twisted.internet.error import TimeoutError
 
 from synapse import types
 from synapse.api.constants import EventTypes, Membership
-from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError
-from synapse.handlers.identity import LookupAlgorithm, create_id_access_token_header
-from synapse.http.client import SimpleHttpClient
+from synapse.api.errors import AuthError, Codes, SynapseError
 from synapse.types import RoomID, UserID
 from synapse.util.async_helpers import Linearizer
 from synapse.util.distributor import user_joined_room, user_left_room
-from synapse.util.hash import sha256_and_url_safe_base64
 
 from ._base import BaseHandler
 
 logger = logging.getLogger(__name__)
 
-id_server_scheme = "https://"
-
 
 class RoomMemberHandler(object):
     # TODO(paul): This handler currently contains a messy conflation of
@@ -63,14 +53,10 @@ class RoomMemberHandler(object):
         self.auth = hs.get_auth()
         self.state_handler = hs.get_state_handler()
         self.config = hs.config
-        # We create a blacklisting instance of SimpleHttpClient for contacting identity
-        # servers specified by clients
-        self.simple_http_client = SimpleHttpClient(
-            hs, ip_blacklist=hs.config.federation_ip_range_blacklist
-        )
 
         self.federation_handler = hs.get_handlers().federation_handler
         self.directory_handler = hs.get_handlers().directory_handler
+        self.identity_handler = hs.get_handlers().identity_handler
         self.registration_handler = hs.get_registration_handler()
         self.profile_handler = hs.get_profile_handler()
         self.event_creation_handler = hs.get_event_creation_handler()
@@ -682,7 +668,9 @@ class RoomMemberHandler(object):
                 403, "Looking up third-party identifiers is denied from this server"
             )
 
-        invitee = yield self._lookup_3pid(id_server, medium, address, id_access_token)
+        invitee = yield self.identity_handler.lookup_3pid(
+            id_server, medium, address, id_access_token
+        )
 
         if invitee:
             yield self.update_membership(
@@ -700,211 +688,6 @@ class RoomMemberHandler(object):
                 id_access_token=id_access_token,
             )
 
-    @defer.inlineCallbacks
-    def _lookup_3pid(self, id_server, medium, address, id_access_token=None):
-        """Looks up a 3pid in the passed identity server.
-
-        Args:
-            id_server (str): The server name (including port, if required)
-                of the identity server to use.
-            medium (str): The type of the third party identifier (e.g. "email").
-            address (str): The third party identifier (e.g. "foo@example.com").
-            id_access_token (str|None): The access token to authenticate to the identity
-                server with
-
-        Returns:
-            str|None: the matrix ID of the 3pid, or None if it is not recognized.
-        """
-        if id_access_token is not None:
-            try:
-                results = yield self._lookup_3pid_v2(
-                    id_server, id_access_token, medium, address
-                )
-                return results
-
-            except Exception as e:
-                # Catch HttpResponseExcept for a non-200 response code
-                # Check if this identity server does not know about v2 lookups
-                if isinstance(e, HttpResponseException) and e.code == 404:
-                    # This is an old identity server that does not yet support v2 lookups
-                    logger.warning(
-                        "Attempted v2 lookup on v1 identity server %s. Falling "
-                        "back to v1",
-                        id_server,
-                    )
-                else:
-                    logger.warning("Error when looking up hashing details: %s", e)
-                    return None
-
-        return (yield self._lookup_3pid_v1(id_server, medium, address))
-
-    @defer.inlineCallbacks
-    def _lookup_3pid_v1(self, id_server, medium, address):
-        """Looks up a 3pid in the passed identity server using v1 lookup.
-
-        Args:
-            id_server (str): The server name (including port, if required)
-                of the identity server to use.
-            medium (str): The type of the third party identifier (e.g. "email").
-            address (str): The third party identifier (e.g. "foo@example.com").
-
-        Returns:
-            str: the matrix ID of the 3pid, or None if it is not recognized.
-        """
-        try:
-            data = yield self.simple_http_client.get_json(
-                "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server),
-                {"medium": medium, "address": address},
-            )
-
-            if "mxid" in data:
-                if "signatures" not in data:
-                    raise AuthError(401, "No signatures on 3pid binding")
-                yield self._verify_any_signature(data, id_server)
-                return data["mxid"]
-        except TimeoutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-        except IOError as e:
-            logger.warning("Error from v1 identity server lookup: %s" % (e,))
-
-        return None
-
-    @defer.inlineCallbacks
-    def _lookup_3pid_v2(self, id_server, id_access_token, medium, address):
-        """Looks up a 3pid in the passed identity server using v2 lookup.
-
-        Args:
-            id_server (str): The server name (including port, if required)
-                of the identity server to use.
-            id_access_token (str): The access token to authenticate to the identity server with
-            medium (str): The type of the third party identifier (e.g. "email").
-            address (str): The third party identifier (e.g. "foo@example.com").
-
-        Returns:
-            Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised.
-        """
-        # Check what hashing details are supported by this identity server
-        try:
-            hash_details = yield self.simple_http_client.get_json(
-                "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
-                {"access_token": id_access_token},
-            )
-        except TimeoutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-
-        if not isinstance(hash_details, dict):
-            logger.warning(
-                "Got non-dict object when checking hash details of %s%s: %s",
-                id_server_scheme,
-                id_server,
-                hash_details,
-            )
-            raise SynapseError(
-                400,
-                "Non-dict object from %s%s during v2 hash_details request: %s"
-                % (id_server_scheme, id_server, hash_details),
-            )
-
-        # Extract information from hash_details
-        supported_lookup_algorithms = hash_details.get("algorithms")
-        lookup_pepper = hash_details.get("lookup_pepper")
-        if (
-            not supported_lookup_algorithms
-            or not isinstance(supported_lookup_algorithms, list)
-            or not lookup_pepper
-            or not isinstance(lookup_pepper, str)
-        ):
-            raise SynapseError(
-                400,
-                "Invalid hash details received from identity server %s%s: %s"
-                % (id_server_scheme, id_server, hash_details),
-            )
-
-        # Check if any of the supported lookup algorithms are present
-        if LookupAlgorithm.SHA256 in supported_lookup_algorithms:
-            # Perform a hashed lookup
-            lookup_algorithm = LookupAlgorithm.SHA256
-
-            # Hash address, medium and the pepper with sha256
-            to_hash = "%s %s %s" % (address, medium, lookup_pepper)
-            lookup_value = sha256_and_url_safe_base64(to_hash)
-
-        elif LookupAlgorithm.NONE in supported_lookup_algorithms:
-            # Perform a non-hashed lookup
-            lookup_algorithm = LookupAlgorithm.NONE
-
-            # Combine together plaintext address and medium
-            lookup_value = "%s %s" % (address, medium)
-
-        else:
-            logger.warning(
-                "None of the provided lookup algorithms of %s are supported: %s",
-                id_server,
-                supported_lookup_algorithms,
-            )
-            raise SynapseError(
-                400,
-                "Provided identity server does not support any v2 lookup "
-                "algorithms that this homeserver supports.",
-            )
-
-        # Authenticate with identity server given the access token from the client
-        headers = {"Authorization": create_id_access_token_header(id_access_token)}
-
-        try:
-            lookup_results = yield self.simple_http_client.post_json_get_json(
-                "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server),
-                {
-                    "addresses": [lookup_value],
-                    "algorithm": lookup_algorithm,
-                    "pepper": lookup_pepper,
-                },
-                headers=headers,
-            )
-        except TimeoutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-        except Exception as e:
-            logger.warning("Error when performing a v2 3pid lookup: %s", e)
-            raise SynapseError(
-                500, "Unknown error occurred during identity server lookup"
-            )
-
-        # Check for a mapping from what we looked up to an MXID
-        if "mappings" not in lookup_results or not isinstance(
-            lookup_results["mappings"], dict
-        ):
-            logger.warning("No results from 3pid lookup")
-            return None
-
-        # Return the MXID if it's available, or None otherwise
-        mxid = lookup_results["mappings"].get(lookup_value)
-        return mxid
-
-    @defer.inlineCallbacks
-    def _verify_any_signature(self, data, server_hostname):
-        if server_hostname not in data["signatures"]:
-            raise AuthError(401, "No signature from server %s" % (server_hostname,))
-        for key_name, signature in data["signatures"][server_hostname].items():
-            try:
-                key_data = yield self.simple_http_client.get_json(
-                    "%s%s/_matrix/identity/api/v1/pubkey/%s"
-                    % (id_server_scheme, server_hostname, key_name)
-                )
-            except TimeoutError:
-                raise SynapseError(500, "Timed out contacting identity server")
-            if "public_key" not in key_data:
-                raise AuthError(
-                    401, "No public key named %s from %s" % (key_name, server_hostname)
-                )
-            verify_signed_json(
-                data,
-                server_hostname,
-                decode_verify_key_bytes(
-                    key_name, decode_base64(key_data["public_key"])
-                ),
-            )
-            return
-
     @defer.inlineCallbacks
     def _make_and_store_3pid_invite(
         self,
@@ -951,7 +734,7 @@ class RoomMemberHandler(object):
             room_avatar_url = room_avatar_event.content.get("url", "")
 
         token, public_keys, fallback_public_key, display_name = (
-            yield self._ask_id_server_for_third_party_invite(
+            yield self.identity_handler.ask_id_server_for_third_party_invite(
                 requester=requester,
                 id_server=id_server,
                 medium=medium,
@@ -987,147 +770,6 @@ class RoomMemberHandler(object):
             txn_id=txn_id,
         )
 
-    @defer.inlineCallbacks
-    def _ask_id_server_for_third_party_invite(
-        self,
-        requester,
-        id_server,
-        medium,
-        address,
-        room_id,
-        inviter_user_id,
-        room_alias,
-        room_avatar_url,
-        room_join_rules,
-        room_name,
-        inviter_display_name,
-        inviter_avatar_url,
-        id_access_token=None,
-    ):
-        """
-        Asks an identity server for a third party invite.
-
-        Args:
-            requester (Requester)
-            id_server (str): hostname + optional port for the identity server.
-            medium (str): The literal string "email".
-            address (str): The third party address being invited.
-            room_id (str): The ID of the room to which the user is invited.
-            inviter_user_id (str): The user ID of the inviter.
-            room_alias (str): An alias for the room, for cosmetic notifications.
-            room_avatar_url (str): The URL of the room's avatar, for cosmetic
-                notifications.
-            room_join_rules (str): The join rules of the email (e.g. "public").
-            room_name (str): The m.room.name of the room.
-            inviter_display_name (str): The current display name of the
-                inviter.
-            inviter_avatar_url (str): The URL of the inviter's avatar.
-            id_access_token (str|None): The access token to authenticate to the identity
-                server with
-
-        Returns:
-            A deferred tuple containing:
-                token (str): The token which must be signed to prove authenticity.
-                public_keys ([{"public_key": str, "key_validity_url": str}]):
-                    public_key is a base64-encoded ed25519 public key.
-                fallback_public_key: One element from public_keys.
-                display_name (str): A user-friendly name to represent the invited
-                    user.
-        """
-        invite_config = {
-            "medium": medium,
-            "address": address,
-            "room_id": room_id,
-            "room_alias": room_alias,
-            "room_avatar_url": room_avatar_url,
-            "room_join_rules": room_join_rules,
-            "room_name": room_name,
-            "sender": inviter_user_id,
-            "sender_display_name": inviter_display_name,
-            "sender_avatar_url": inviter_avatar_url,
-        }
-
-        # Add the identity service access token to the JSON body and use the v2
-        # Identity Service endpoints if id_access_token is present
-        data = None
-        base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server)
-
-        if id_access_token:
-            key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % (
-                id_server_scheme,
-                id_server,
-            )
-
-            # Attempt a v2 lookup
-            url = base_url + "/v2/store-invite"
-            try:
-                data = yield self.simple_http_client.post_json_get_json(
-                    url,
-                    invite_config,
-                    {"Authorization": create_id_access_token_header(id_access_token)},
-                )
-            except TimeoutError:
-                raise SynapseError(500, "Timed out contacting identity server")
-            except HttpResponseException as e:
-                if e.code != 404:
-                    logger.info("Failed to POST %s with JSON: %s", url, e)
-                    raise e
-
-        if data is None:
-            key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % (
-                id_server_scheme,
-                id_server,
-            )
-            url = base_url + "/api/v1/store-invite"
-
-            try:
-                data = yield self.simple_http_client.post_json_get_json(
-                    url, invite_config
-                )
-            except TimeoutError:
-                raise SynapseError(500, "Timed out contacting identity server")
-            except HttpResponseException as e:
-                logger.warning(
-                    "Error trying to call /store-invite on %s%s: %s",
-                    id_server_scheme,
-                    id_server,
-                    e,
-                )
-
-            if data is None:
-                # Some identity servers may only support application/x-www-form-urlencoded
-                # types. This is especially true with old instances of Sydent, see
-                # https://github.com/matrix-org/sydent/pull/170
-                try:
-                    data = yield self.simple_http_client.post_urlencoded_get_json(
-                        url, invite_config
-                    )
-                except HttpResponseException as e:
-                    logger.warning(
-                        "Error calling /store-invite on %s%s with fallback "
-                        "encoding: %s",
-                        id_server_scheme,
-                        id_server,
-                        e,
-                    )
-                    raise e
-
-        # TODO: Check for success
-        token = data["token"]
-        public_keys = data.get("public_keys", [])
-        if "public_key" in data:
-            fallback_public_key = {
-                "public_key": data["public_key"],
-                "key_validity_url": key_validity_url,
-            }
-        else:
-            fallback_public_key = public_keys[0]
-
-        if not public_keys:
-            public_keys.append(fallback_public_key)
-        display_name = data["display_name"]
-        return token, public_keys, fallback_public_key, display_name
-
     @defer.inlineCallbacks
     def _is_host_in_room(self, current_state_ids):
         # Have we just created the room, and is this about to be the very

From 5257a2fb1c983158bbdee8be4e61066f1a83d4a8 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 14:49:53 +0100
Subject: [PATCH 130/276] Reject pending invites on deactivation

---
 synapse/handlers/deactivate_account.py | 31 ++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index d83912c9a..9815365f5 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -120,6 +120,10 @@ class DeactivateAccountHandler(BaseHandler):
         # parts users from rooms (if it isn't already running)
         self._start_user_parting()
 
+        # Reject all pending invites for the user, so that it doesn't show up in the
+        # invitees list of rooms.
+        self._reject_pending_invites_for_user(user_id)
+
         # Remove all information on the user from the account_validity table.
         if self._account_validity_enabled:
             yield self.store.delete_account_validity_for_user(user_id)
@@ -129,6 +133,33 @@ class DeactivateAccountHandler(BaseHandler):
 
         return identity_server_supports_unbinding
 
+    def _reject_pending_invites_for_user(self, user_id):
+        """Reject pending invites addressed to a given user ID.
+
+        Args:
+            user_id (str): The user ID to reject pending invites for.
+        """
+        user = UserID.from_string(user_id)
+        pending_invites = yield self.store.get_invited_rooms_for_user(user_id)
+
+        for room in pending_invites:
+            try:
+                yield self._room_member_handler.update_membership(
+                    create_requester(user),
+                    user,
+                    room.room_id,
+                    "leave",
+                    ratelimit=False,
+                    require_consent=False,
+                )
+            except Exception:
+                logger.exception(
+                    "Failed to reject invite for user %r in room %r:"
+                    " ignoring and continuing",
+                    user_id,
+                    room.room_id,
+                )
+
     def _start_user_parting(self):
         """
         Start the process that goes through the table of users

From 132279a46fd76de8f767bc6977192900c450fec9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 27 Sep 2019 15:11:14 +0100
Subject: [PATCH 131/276] Patch inlinecallbacks for log contexts

---
 synapse/__init__.py             |  6 +++
 synapse/handlers/room_member.py |  4 +-
 synapse/storage/_base.py        |  7 +--
 synapse/storage/push_rule.py    |  2 +-
 tests/patch_inline_callbacks.py | 86 +++++++++++++++++++++++++++++++--
 5 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/synapse/__init__.py b/synapse/__init__.py
index ddfe9ec54..4401fd52f 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -17,8 +17,11 @@
 """ This is a reference implementation of a Matrix home server.
 """
 
+import os
 import sys
 
+from tests.patch_inline_callbacks import do_patch
+
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 5):
     print("Synapse requires Python 3.5 or above.")
@@ -36,3 +39,6 @@ except ImportError:
     pass
 
 __version__ = "1.4.0rc1"
+
+if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
+    do_patch()
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 8abdb1b6e..19e44b546 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -213,11 +213,11 @@ class RoomMemberHandler(object):
 
             if predecessor:
                 # It is an upgraded room. Copy over old tags
-                self.copy_room_tags_and_direct_to_room(
+                yield self.copy_room_tags_and_direct_to_room(
                     predecessor["room_id"], room_id, user_id
                 )
                 # Move over old push rules
-                self.store.move_push_rules_from_room_to_room_for_user(
+                yield self.store.move_push_rules_from_room_to_room_for_user(
                     predecessor["room_id"], room_id, user_id
                 )
         elif event.membership == Membership.LEAVE:
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index abe16334e..06cc14fcd 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -30,7 +30,7 @@ from prometheus_client import Histogram
 from twisted.internet import defer
 
 from synapse.api.errors import StoreError
-from synapse.logging.context import LoggingContext, PreserveLoggingContext
+from synapse.logging.context import LoggingContext, make_deferred_yieldable
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import get_domain_from_id
@@ -550,8 +550,9 @@ class SQLBaseStore(object):
 
                 return func(conn, *args, **kwargs)
 
-        with PreserveLoggingContext():
-            result = yield self._db_pool.runWithConnection(inner_func, *args, **kwargs)
+        result = yield make_deferred_yieldable(
+            self._db_pool.runWithConnection(inner_func, *args, **kwargs)
+        )
 
         return result
 
diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py
index a6517c4cf..1f878e657 100644
--- a/synapse/storage/push_rule.py
+++ b/synapse/storage/push_rule.py
@@ -235,7 +235,7 @@ class PushRulesWorkerStore(
                 (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
                 for c in conditions
             ):
-                self.move_push_rule_from_room_to_room(new_room_id, user_id, rule)
+                yield self.move_push_rule_from_room_to_room(new_room_id, user_id, rule)
 
     @defer.inlineCallbacks
     def bulk_get_push_rules_for_room(self, event, context):
diff --git a/tests/patch_inline_callbacks.py b/tests/patch_inline_callbacks.py
index 220884311..5ef0aff0c 100644
--- a/tests/patch_inline_callbacks.py
+++ b/tests/patch_inline_callbacks.py
@@ -15,6 +15,7 @@
 
 from __future__ import print_function
 
+import inspect
 import functools
 import sys
 
@@ -33,17 +34,19 @@ def do_patch():
     orig_inline_callbacks = defer.inlineCallbacks
 
     def new_inline_callbacks(f):
-
-        orig = orig_inline_callbacks(f)
-
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
             start_context = LoggingContext.current_context()
+            changes = []
+            orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context))
 
             try:
                 res = orig(*args, **kwargs)
             except Exception:
                 if LoggingContext.current_context() != start_context:
+                    for err in changes:
+                        print(err, file=sys.stderr)
+
                     err = "%s changed context from %s to %s on exception" % (
                         f,
                         start_context,
@@ -55,7 +58,10 @@ def do_patch():
 
             if not isinstance(res, Deferred) or res.called:
                 if LoggingContext.current_context() != start_context:
-                    err = "%s changed context from %s to %s" % (
+                    for err in changes:
+                        print(err, file=sys.stderr)
+
+                    err = "Completed %s changed context from %s to %s" % (
                         f,
                         start_context,
                         LoggingContext.current_context(),
@@ -76,6 +82,8 @@ def do_patch():
 
             def check_ctx(r):
                 if LoggingContext.current_context() != start_context:
+                    for err in changes:
+                        print(err, file=sys.stderr)
                     err = "%s completion of %s changed context from %s to %s" % (
                         "Failure" if isinstance(r, Failure) else "Success",
                         f,
@@ -92,3 +100,73 @@ def do_patch():
         return wrapped
 
     defer.inlineCallbacks = new_inline_callbacks
+
+
+def _check_yield_points(f, changes, start_context):
+    from synapse.logging.context import LoggingContext
+
+    @functools.wraps(f)
+    def check_yield_points_inner(*args, **kwargs):
+        gen = f(*args, **kwargs)
+
+        last_yield_line_no = 1
+        result = None
+        while True:
+            try:
+                isFailure = isinstance(result, Failure)
+                if isFailure:
+                    d = result.throwExceptionIntoGenerator(gen)
+                else:
+                    d = gen.send(result)
+            except (StopIteration, defer._DefGen_Return) as e:
+                if LoggingContext.current_context() != start_context:
+                    # This happens when the context is lost sometime *after* the
+                    # final yield and returning. E.g. we forgot to yield on a
+                    # function that returns a deferred.
+                    err = (
+                        "%s returned and changed context from %s to %s, in %s between %d and end of func"
+                        % (
+                            f.__qualname__,
+                            start_context,
+                            LoggingContext.current_context(),
+                            f.__code__.co_filename,
+                            last_yield_line_no,
+                        )
+                    )
+                    changes.append(err)
+                    # print(err, file=sys.stderr)
+                    # raise Exception(err)
+                return getattr(e, "value", None)
+
+            try:
+                result = yield d
+            except Exception as e:
+                result = Failure(e)
+
+            frame = gen.gi_frame
+            if frame.f_code.co_name == "check_yield_points_inner":
+                frame = inspect.getgeneratorlocals(gen)["gen"].gi_frame
+
+            if LoggingContext.current_context() != start_context:
+                # This happens because the context is lost sometime *after* the
+                # previous yield and *after* the current yield. E.g. the
+                # deferred we waited on didn't follow the rules, or we forgot to
+                # yield on a function between the two yield points.
+                err = (
+                    "%s changed context from %s to %s, happened between lines %d and %d in %s"
+                    % (
+                        frame.f_code.co_name,
+                        start_context,
+                        LoggingContext.current_context(),
+                        last_yield_line_no,
+                        frame.f_lineno,
+                        frame.f_code.co_filename,
+                    )
+                )
+                changes.append(err)
+                # print(err, file=sys.stderr)
+                # raise Exception(err)
+
+            last_yield_line_no = frame.f_lineno
+
+    return check_yield_points_inner

From 72a2708ac6335985eb5171f5685f73d2ea120a2e Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 15:13:39 +0100
Subject: [PATCH 132/276] Fixup and add some logging

---
 synapse/handlers/deactivate_account.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 9815365f5..763fea3a2 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -122,7 +122,7 @@ class DeactivateAccountHandler(BaseHandler):
 
         # Reject all pending invites for the user, so that it doesn't show up in the
         # invitees list of rooms.
-        self._reject_pending_invites_for_user(user_id)
+        yield self._reject_pending_invites_for_user(user_id)
 
         # Remove all information on the user from the account_validity table.
         if self._account_validity_enabled:
@@ -133,6 +133,7 @@ class DeactivateAccountHandler(BaseHandler):
 
         return identity_server_supports_unbinding
 
+    @defer.inlineCallbacks
     def _reject_pending_invites_for_user(self, user_id):
         """Reject pending invites addressed to a given user ID.
 
@@ -142,6 +143,8 @@ class DeactivateAccountHandler(BaseHandler):
         user = UserID.from_string(user_id)
         pending_invites = yield self.store.get_invited_rooms_for_user(user_id)
 
+        logger.info(pending_invites)
+
         for room in pending_invites:
             try:
                 yield self._room_member_handler.update_membership(
@@ -152,6 +155,11 @@ class DeactivateAccountHandler(BaseHandler):
                     ratelimit=False,
                     require_consent=False,
                 )
+                logger.info(
+                    "Rejected invite for user %r in room %r",
+                    user_id,
+                    room.room_id,
+                )
             except Exception:
                 logger.exception(
                     "Failed to reject invite for user %r in room %r:"

From e94ff67903c3370fc5bc8b6c336433057e38ff05 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 15:14:02 +0100
Subject: [PATCH 133/276] Add test to validate the change

---
 tests/rest/client/v2_alpha/test_account.py | 70 ++++++++++++++++++----
 1 file changed, 57 insertions(+), 13 deletions(-)

diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py
index 920de41de..69c33dfd8 100644
--- a/tests/rest/client/v2_alpha/test_account.py
+++ b/tests/rest/client/v2_alpha/test_account.py
@@ -23,8 +23,8 @@ from email.parser import Parser
 import pkg_resources
 
 import synapse.rest.admin
-from synapse.api.constants import LoginType
-from synapse.rest.client.v1 import login
+from synapse.api.constants import LoginType, Membership
+from synapse.rest.client.v1 import login, room
 from synapse.rest.client.v2_alpha import account, register
 
 from tests import unittest
@@ -244,16 +244,69 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
         account.register_servlets,
+        room.register_servlets,
     ]
 
     def make_homeserver(self, reactor, clock):
-        hs = self.setup_test_homeserver()
-        return hs
+        self.hs = self.setup_test_homeserver()
+        return self.hs
 
     def test_deactivate_account(self):
         user_id = self.register_user("kermit", "test")
         tok = self.login("kermit", "test")
 
+        self.deactivate(user_id, tok)
+
+        store = self.hs.get_datastore()
+
+        # Check that the user has been marked as deactivated.
+        self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id)))
+
+        # Check that this access token has been invalidated.
+        request, channel = self.make_request("GET", "account/whoami")
+        self.render(request)
+        self.assertEqual(request.code, 401)
+
+    @unittest.INFO
+    def test_pending_invites(self):
+        """Tests that deactivating a user rejects every pending invite for them."""
+        store = self.hs.get_datastore()
+
+        inviter_id = self.register_user("inviter", "test")
+        inviter_tok = self.login("inviter", "test")
+
+        invitee_id = self.register_user("invitee", "test")
+        invitee_tok = self.login("invitee", "test")
+
+        # Make @inviter:test invite @invitee:test in a new room.
+        room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok)
+        self.helper.invite(
+            room=room_id,
+            src=inviter_id,
+            targ=invitee_id,
+            tok=inviter_tok,
+        )
+
+        # Make sure the invite is here.
+        pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id))
+        self.assertEqual(len(pending_invites), 1, pending_invites)
+        self.assertEqual(pending_invites[0].room_id, room_id, pending_invites)
+
+        # Deactivate @invitee:test.
+        self.deactivate(invitee_id, invitee_tok)
+
+        # Check that the invite isn't there anymore.
+        pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id))
+        self.assertEqual(len(pending_invites), 0, pending_invites)
+
+        # Check that the membership of @invitee:test in the room is now "leave".
+        memberships = self.get_success(
+            store.get_rooms_for_user_where_membership_is(invitee_id, [Membership.LEAVE])
+        )
+        self.assertEqual(len(memberships), 1, memberships)
+        self.assertEqual(memberships[0].room_id, room_id, memberships)
+
+    def deactivate(self, user_id, tok):
         request_data = json.dumps(
             {
                 "auth": {
@@ -270,12 +323,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
         self.render(request)
         self.assertEqual(request.code, 200)
 
-        store = self.hs.get_datastore()
-
-        # Check that the user has been marked as deactivated.
-        self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id)))
-
-        # Check that this access token has been invalidated.
-        request, channel = self.make_request("GET", "account/whoami")
-        self.render(request)
-        self.assertEqual(request.code, 401)

From 0804a27c8c7c2cc9f0adbb0329bffcd8ce10e1bd Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 15:14:34 +0100
Subject: [PATCH 134/276] Changelog

---
 changelog.d/6125.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6125.feature

diff --git a/changelog.d/6125.feature b/changelog.d/6125.feature
new file mode 100644
index 000000000..432e255ad
--- /dev/null
+++ b/changelog.d/6125.feature
@@ -0,0 +1 @@
+Reject all pending invite for a user during deactivation.

From 873fe7883cf0d7cf5346a9a55d40967a35848e33 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 15:21:03 +0100
Subject: [PATCH 135/276] Lint

---
 synapse/handlers/deactivate_account.py     | 4 +---
 tests/rest/client/v2_alpha/test_account.py | 8 +-------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 763fea3a2..148d1424c 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -156,9 +156,7 @@ class DeactivateAccountHandler(BaseHandler):
                     require_consent=False,
                 )
                 logger.info(
-                    "Rejected invite for user %r in room %r",
-                    user_id,
-                    room.room_id,
+                    "Rejected invite for user %r in room %r", user_id, room.room_id
                 )
             except Exception:
                 logger.exception(
diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py
index 69c33dfd8..434b730fa 100644
--- a/tests/rest/client/v2_alpha/test_account.py
+++ b/tests/rest/client/v2_alpha/test_account.py
@@ -280,12 +280,7 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
 
         # Make @inviter:test invite @invitee:test in a new room.
         room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok)
-        self.helper.invite(
-            room=room_id,
-            src=inviter_id,
-            targ=invitee_id,
-            tok=inviter_tok,
-        )
+        self.helper.invite(room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok)
 
         # Make sure the invite is here.
         pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id))
@@ -322,4 +317,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
         )
         self.render(request)
         self.assertEqual(request.code, 200)
-

From fbb8ff3088abab48bd5815a1acaeb9243ada7431 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 15:23:07 +0100
Subject: [PATCH 136/276] ok

---
 tests/rest/client/v2_alpha/test_account.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py
index 434b730fa..0f51895b8 100644
--- a/tests/rest/client/v2_alpha/test_account.py
+++ b/tests/rest/client/v2_alpha/test_account.py
@@ -280,7 +280,9 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
 
         # Make @inviter:test invite @invitee:test in a new room.
         room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok)
-        self.helper.invite(room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok)
+        self.helper.invite(
+            room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok
+        )
 
         # Make sure the invite is here.
         pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id))

From 6374ca40c2ff3d2eaa41af5ebf4f8324522ecb84 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 27 Sep 2019 15:58:14 +0100
Subject: [PATCH 137/276] Update

---
 tests/patch_inline_callbacks.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/tests/patch_inline_callbacks.py b/tests/patch_inline_callbacks.py
index 5ef0aff0c..a35a1d330 100644
--- a/tests/patch_inline_callbacks.py
+++ b/tests/patch_inline_callbacks.py
@@ -15,7 +15,6 @@
 
 from __future__ import print_function
 
-import inspect
 import functools
 import sys
 
@@ -32,6 +31,8 @@ def do_patch():
     from synapse.logging.context import LoggingContext
 
     orig_inline_callbacks = defer.inlineCallbacks
+    if hasattr(orig_inline_callbacks, "patched_by_synapse"):
+        return
 
     def new_inline_callbacks(f):
         @functools.wraps(f)
@@ -100,13 +101,20 @@ def do_patch():
         return wrapped
 
     defer.inlineCallbacks = new_inline_callbacks
+    new_inline_callbacks.patched_by_synapse = True
 
 
 def _check_yield_points(f, changes, start_context):
+    """Wraps a generator that is about to passed to defer.inlineCallbacks
+    checking that after every yield the log contexts are correct.
+    """
+
     from synapse.logging.context import LoggingContext
 
     @functools.wraps(f)
     def check_yield_points_inner(*args, **kwargs):
+        expected_context = start_context
+
         gen = f(*args, **kwargs)
 
         last_yield_line_no = 1
@@ -119,12 +127,13 @@ def _check_yield_points(f, changes, start_context):
                 else:
                     d = gen.send(result)
             except (StopIteration, defer._DefGen_Return) as e:
-                if LoggingContext.current_context() != start_context:
+                if LoggingContext.current_context() != expected_context:
                     # This happens when the context is lost sometime *after* the
                     # final yield and returning. E.g. we forgot to yield on a
                     # function that returns a deferred.
                     err = (
-                        "%s returned and changed context from %s to %s, in %s between %d and end of func"
+                        "Function %r returned and changed context from %s to %s,"
+                        " in %s between %d and end of func"
                         % (
                             f.__qualname__,
                             start_context,
@@ -134,7 +143,6 @@ def _check_yield_points(f, changes, start_context):
                         )
                     )
                     changes.append(err)
-                    # print(err, file=sys.stderr)
                     # raise Exception(err)
                 return getattr(e, "value", None)
 
@@ -144,10 +152,8 @@ def _check_yield_points(f, changes, start_context):
                 result = Failure(e)
 
             frame = gen.gi_frame
-            if frame.f_code.co_name == "check_yield_points_inner":
-                frame = inspect.getgeneratorlocals(gen)["gen"].gi_frame
 
-            if LoggingContext.current_context() != start_context:
+            if LoggingContext.current_context() != expected_context:
                 # This happens because the context is lost sometime *after* the
                 # previous yield and *after* the current yield. E.g. the
                 # deferred we waited on didn't follow the rules, or we forgot to
@@ -164,9 +170,10 @@ def _check_yield_points(f, changes, start_context):
                     )
                 )
                 changes.append(err)
-                # print(err, file=sys.stderr)
                 # raise Exception(err)
 
+                expected_context = LoggingContext.current_context()
+
             last_yield_line_no = frame.f_lineno
 
     return check_yield_points_inner

From 25a0a36ad9b63aa2becabc5c311025cb612d466f Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 16:10:24 +0100
Subject: [PATCH 138/276] Update changelog.d/6125.feature

Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
---
 changelog.d/6125.feature | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/6125.feature b/changelog.d/6125.feature
index 432e255ad..cbe5f8d3c 100644
--- a/changelog.d/6125.feature
+++ b/changelog.d/6125.feature
@@ -1 +1 @@
-Reject all pending invite for a user during deactivation.
+Reject all pending invites for a user during deactivation.

From bbe2a0f33916d7b01179c56b230307c46843625a Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 16:10:36 +0100
Subject: [PATCH 139/276] Update synapse/handlers/deactivate_account.py

Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
---
 synapse/handlers/deactivate_account.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 148d1424c..5cf01479d 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -120,7 +120,7 @@ class DeactivateAccountHandler(BaseHandler):
         # parts users from rooms (if it isn't already running)
         self._start_user_parting()
 
-        # Reject all pending invites for the user, so that it doesn't show up in the
+        # Reject all pending invites for the user, so that they do not show up in the
         # invitees list of rooms.
         yield self._reject_pending_invites_for_user(user_id)
 

From af92110c465ea7cf4d04e1193b58f16ae26a75d6 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 16:12:15 +0100
Subject: [PATCH 140/276] Update synapse/handlers/deactivate_account.py

Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
---
 synapse/handlers/deactivate_account.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 5cf01479d..5f142f82c 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -156,7 +156,9 @@ class DeactivateAccountHandler(BaseHandler):
                     require_consent=False,
                 )
                 logger.info(
-                    "Rejected invite for user %r in room %r", user_id, room.room_id
+                    "Rejected invite for deactivated user %r in room %r",
+                    user_id,
+                    room.room_id,
                 )
             except Exception:
                 logger.exception(

From 3e42d47a5a06ea5d353b75a42040107bf401d8ba Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 27 Sep 2019 16:15:01 +0100
Subject: [PATCH 141/276] Incorporate review

---
 synapse/handlers/deactivate_account.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 5f142f82c..63267a0a4 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -120,8 +120,8 @@ class DeactivateAccountHandler(BaseHandler):
         # parts users from rooms (if it isn't already running)
         self._start_user_parting()
 
-        # Reject all pending invites for the user, so that they do not show up in the
-        # invitees list of rooms.
+        # Reject all pending invites for the user, so that the user doesn't show up in the
+        # "invited" section of rooms' members list.
         yield self._reject_pending_invites_for_user(user_id)
 
         # Remove all information on the user from the account_validity table.
@@ -143,8 +143,6 @@ class DeactivateAccountHandler(BaseHandler):
         user = UserID.from_string(user_id)
         pending_invites = yield self.store.get_invited_rooms_for_user(user_id)
 
-        logger.info(pending_invites)
-
         for room in pending_invites:
             try:
                 yield self._room_member_handler.update_membership(

From f3451118a6dca1499daadf224c3eab801dad0c0c Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 27 Sep 2019 17:59:18 +0100
Subject: [PATCH 142/276] Edit SimpleHttpClient to reference that header keys
 can be passed as str or bytes (#6077)

---
 changelog.d/6077.misc  |  1 +
 synapse/http/client.py | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/6077.misc

diff --git a/changelog.d/6077.misc b/changelog.d/6077.misc
new file mode 100644
index 000000000..31ac5b97a
--- /dev/null
+++ b/changelog.d/6077.misc
@@ -0,0 +1 @@
+Edit header dicts docstrings in SimpleHttpClient to note that `str` or `bytes` can be passed as header keys.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 51765ae3c..cdf828a4f 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -327,7 +327,7 @@ class SimpleHttpClient(object):
         Args:
             uri (str):
             args (dict[str, str|List[str]]): query params
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
 
         Returns:
@@ -371,7 +371,7 @@ class SimpleHttpClient(object):
         Args:
             uri (str):
             post_json (object):
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
 
         Returns:
@@ -414,7 +414,7 @@ class SimpleHttpClient(object):
                 None.
                 **Note**: The value of each key is assumed to be an iterable
                 and *not* a string.
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
         Returns:
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
@@ -438,7 +438,7 @@ class SimpleHttpClient(object):
                 None.
                 **Note**: The value of each key is assumed to be an iterable
                 and *not* a string.
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
         Returns:
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
@@ -482,7 +482,7 @@ class SimpleHttpClient(object):
                 None.
                 **Note**: The value of each key is assumed to be an iterable
                 and *not* a string.
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
         Returns:
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
@@ -516,7 +516,7 @@ class SimpleHttpClient(object):
         Args:
             url (str): The URL to GET
             output_stream (file): File to write the response body to.
-            headers (dict[str, List[str]]|None): If not None, a map from
+            headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from
                header name to a list of values for that header
         Returns:
             A (int,dict,string,int) tuple of the file length, dict of the response

From 16cb9a71b8b46604d49944f0b9c316687becca93 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 30 Sep 2019 09:38:41 +0100
Subject: [PATCH 143/276] Drop unused tables (#6115)

These tables are unused since #5893 (as amended by #6047), so we can now drop
them.

Fixes #6048.
---
 changelog.d/6115.misc                         |  1 +
 .../delta/56/drop_unused_event_tables.sql     | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 changelog.d/6115.misc
 create mode 100644 synapse/storage/schema/delta/56/drop_unused_event_tables.sql

diff --git a/changelog.d/6115.misc b/changelog.d/6115.misc
new file mode 100644
index 000000000..b19e395a9
--- /dev/null
+++ b/changelog.d/6115.misc
@@ -0,0 +1 @@
+Drop some unused database tables.
diff --git a/synapse/storage/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql
new file mode 100644
index 000000000..9f09922c6
--- /dev/null
+++ b/synapse/storage/schema/delta/56/drop_unused_event_tables.sql
@@ -0,0 +1,20 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- these tables are never used.
+DROP TABLE IF EXISTS room_names;
+DROP TABLE IF EXISTS topics;
+DROP TABLE IF EXISTS history_visibility;
+DROP TABLE IF EXISTS guest_access;

From 9267741a5f7732d7d16f8445edc68bc68b730601 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 30 Sep 2019 11:58:36 +0100
Subject: [PATCH 144/276] Fix `devices_last_seen` background update.

Fixes #6134.
---
 synapse/storage/client_ips.py       | 46 ++++++++++++++++++++++++-----
 synapse/storage/engines/postgres.py |  7 +++++
 synapse/storage/engines/sqlite.py   |  8 +++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 539584288..bb135166c 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -463,14 +463,46 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         last_device_id = progress.get("last_device_id", "")
 
         def _devices_last_seen_update_txn(txn):
+            # This consists of two queries:
+            #
+            #   1. The sub-query searches for the next N devices and joins
+            #      against user_ips to find the max last_seen associated with
+            #      that device.
+            #   2. The outer query then joins again against user_ips on
+            #      user/device/last_seen. This *should* hopefully only
+            #      return one row, but if it does return more than one then
+            #      we'll just end up updating the same device row multiple
+            #      times, which is fine.
+
+            if self.database_engine.supports_tuple_comparison:
+                where_clause = "(user_id, device_id) > (?, ?)"
+                where_args = [last_user_id, last_device_id]
+            else:
+                # We explicitly do a `user_id >= ? AND (...)` here to ensure
+                # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
+                # makes it hard for query optimiser to tell that it can use the
+                # index on user_id
+                where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
+                where_args = [last_user_id, last_user_id, last_device_id]
+
             sql = """
-                SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices
-                INNER JOIN user_ips AS u USING (user_id, device_id)
-                WHERE user_id > ? OR (user_id = ? AND device_id > ?)
-                ORDER BY user_id ASC, device_id ASC
-                LIMIT ?
-            """
-            txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size))
+                SELECT
+                    last_seen, ip, user_agent, user_id, device_id
+                FROM (
+                    SELECT
+                        user_id, device_id, MAX(u.last_seen) AS last_seen
+                    FROM devices
+                    INNER JOIN user_ips AS u USING (user_id, device_id)
+                    WHERE %(where_clause)s
+                    GROUP BY user_id, device_id
+                    ORDER BY user_id ASC, device_id ASC
+                    LIMIT ?
+                ) c
+                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
+            """ % {
+                "where_clause": where_clause
+            }
+            txn.execute(sql, where_args + [batch_size])
 
             rows = txn.fetchall()
             if not rows:
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 289b6bc28..601617b21 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -72,6 +72,13 @@ class PostgresEngine(object):
         """
         return True
 
+    @property
+    def supports_tuple_comparison(self):
+        """
+        Do we support comparing tuples, i.e. `(a, b) > (c, d)`?
+        """
+        return True
+
     def is_deadlock(self, error):
         if isinstance(error, self.module.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index e9b9caa49..ac9210936 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -38,6 +38,14 @@ class Sqlite3Engine(object):
         """
         return self.module.sqlite_version_info >= (3, 24, 0)
 
+    @property
+    def supports_tuple_comparison(self):
+        """
+        Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires
+        SQLite 3.15+.
+        """
+        return self.module.sqlite_version_info >= (3, 15, 0)
+
     def check_database(self, txn):
         pass
 

From 129343cd8a90f27142fbb0c7b723ae2cb16207b9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 30 Sep 2019 12:00:34 +0100
Subject: [PATCH 145/276] Newsfile

---
 changelog.d/6135.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6135.bugfix

diff --git a/changelog.d/6135.bugfix b/changelog.d/6135.bugfix
new file mode 100644
index 000000000..5f9f010cb
--- /dev/null
+++ b/changelog.d/6135.bugfix
@@ -0,0 +1 @@
+Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres.

From a27fb7d5cac3cacc55a1c778f02d074d4115eea6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 1 Oct 2019 11:05:48 +0100
Subject: [PATCH 146/276] Don't repeatedly attempt to censor events we don't
 have.

Currently we don't set `have_censored` column if we don't have the
target event of a redaction, which means we repeatedly attempt to censor
the same non-existant event.

When we persist non-redacted events we unset the `have_censored` column
for any redactions that target said event.
---
 synapse/storage/events.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index ddf7ab647..5d56ceeab 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1389,6 +1389,22 @@ class EventsStore(
             ],
         )
 
+        for event, _ in events_and_contexts:
+            if not event.internal_metadata.is_redacted():
+                # If we're persisting an unredacted event we go and ensure
+                # that we mark any redactions that reference this event as
+                # requiring censoring.
+                self._simple_update_txn(
+                    txn,
+                    table="redactions",
+                    keyvalues={
+                        "redacts": event.event_id,
+                    },
+                    updatevalues={
+                        "have_censored": False,
+                    }
+                )
+
     def _store_rejected_events_txn(self, txn, events_and_contexts):
         """Add rows to the 'rejections' table for received events which were
         rejected
@@ -1589,7 +1605,7 @@ class EventsStore(
         sql = """
             SELECT redact_event.event_id, redacts FROM redactions
             INNER JOIN events AS redact_event USING (event_id)
-            INNER JOIN events AS original_event ON (
+            LEFT JOIN events AS original_event ON (
                 redact_event.room_id = original_event.room_id
                 AND redacts = original_event.event_id
             )

From 898dde981b41a4dfb79b5830f17e6eb9871ef762 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 1 Oct 2019 13:23:34 +0100
Subject: [PATCH 147/276] Add received_ts column to redactions.

This will allow us to efficiently search for uncensored redactions in
the DB before a given time.
---
 synapse/storage/events.py                     | 20 +++---
 synapse/storage/events_bg_updates.py          | 61 +++++++++++++++++++
 .../schema/delta/56/redaction_censor2.sql     | 20 ++++++
 3 files changed, 92 insertions(+), 9 deletions(-)
 create mode 100644 synapse/storage/schema/delta/56/redaction_censor2.sql

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 5d56ceeab..3104815f1 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1397,12 +1397,8 @@ class EventsStore(
                 self._simple_update_txn(
                     txn,
                     table="redactions",
-                    keyvalues={
-                        "redacts": event.event_id,
-                    },
-                    updatevalues={
-                        "have_censored": False,
-                    }
+                    keyvalues={"redacts": event.event_id},
+                    updatevalues={"have_censored": False},
                 )
 
     def _store_rejected_events_txn(self, txn, events_and_contexts):
@@ -1568,9 +1564,15 @@ class EventsStore(
     def _store_redaction(self, txn, event):
         # invalidate the cache for the redacted event
         txn.call_after(self._invalidate_get_event_cache, event.redacts)
-        txn.execute(
-            "INSERT INTO redactions (event_id, redacts) VALUES (?,?)",
-            (event.event_id, event.redacts),
+
+        self._simple_insert_txn(
+            txn,
+            table="redactions",
+            values={
+                "event_id": event.event_id,
+                "redacts": event.redacts,
+                "received_ts": self._clock.time_msec(),
+            },
         )
 
     @defer.inlineCallbacks
diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py
index 6587f31e2..5717baf48 100644
--- a/synapse/storage/events_bg_updates.py
+++ b/synapse/storage/events_bg_updates.py
@@ -67,6 +67,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update
         )
 
+        self.register_background_update_handler(
+            "redactions_received_ts", self._redactions_received_ts
+        )
+
     @defer.inlineCallbacks
     def _background_reindex_fields_sender(self, progress, batch_size):
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
@@ -397,3 +401,60 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             )
 
         return num_handled
+
+    @defer.inlineCallbacks
+    def _redactions_received_ts(self, progress, batch_size):
+        """Handles filling out the `received_ts` column in redactions.
+        """
+        last_event_id = progress.get("last_event_id", "")
+
+        def _redactions_received_ts_txn(txn):
+            # Fetch the set of event IDs that we want to update
+            sql = """
+                SELECT event_id FROM redactions
+                WHERE event_id > ?
+                ORDER BY event_id ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (last_event_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            upper_event_id, = rows[-1]
+
+            # Update the redactions with the received_ts.
+            #
+            # Note: Not all events have an associated received_ts, so we
+            # fallback to using origin_server_ts. If we for some reason don't
+            # have an origin_server_ts, lets just use the current timestamp.
+            #
+            # We don't want to leave it null, as then we'll never try and
+            # censor those redactions.
+            sql = """
+                UPDATE redactions
+                SET received_ts = (
+                    SELECT COALESCE(received_ts, origin_server_ts, ?) FROM events
+                    WHERE events.event_id = redactions.event_id
+                )
+                WHERE ? <= event_id AND event_id <= ?
+            """
+
+            txn.execute(sql, (self._clock.time_msec(), last_event_id, upper_event_id))
+
+            self._background_update_progress_txn(
+                txn, "redactions_received_ts", {"last_event_id": upper_event_id}
+            )
+
+            return len(rows)
+
+        count = yield self.runInteraction(
+            "_redactions_received_ts", _redactions_received_ts_txn
+        )
+
+        if not count:
+            yield self._end_background_update("redactions_received_ts")
+
+        return count
diff --git a/synapse/storage/schema/delta/56/redaction_censor2.sql b/synapse/storage/schema/delta/56/redaction_censor2.sql
new file mode 100644
index 000000000..77a5eca49
--- /dev/null
+++ b/synapse/storage/schema/delta/56/redaction_censor2.sql
@@ -0,0 +1,20 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE redactions ADD COLUMN received_ts BIGINT;
+CREATE INDEX redactions_have_censored_ts ON redactions(received_ts) WHERE not have_censored;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('redactions_received_ts', '{}');

From 5e8387af9e771ae42c7c8c4dc186000d862d3787 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 1 Oct 2019 13:28:41 +0100
Subject: [PATCH 148/276] Use `received_ts` to find uncensored redacted events

Joining against `events` and ordering by `stream_ordering` is
inefficient as it forced scanning the entirety of the redactions table.

This isn't the case if we use `redactions.received_ts` column as we can
then use an index.
---
 synapse/storage/events.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 3104815f1..2e485c864 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1589,36 +1589,29 @@ class EventsStore(
         if self.hs.config.redaction_retention_period is None:
             return
 
-        max_pos = yield self.find_first_stream_ordering_after_ts(
-            self._clock.time_msec() - self.hs.config.redaction_retention_period
-        )
+        before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period
 
         # We fetch all redactions that:
         #   1. point to an event we have,
-        #   2. has a stream ordering from before the cut off, and
+        #   2. has a received_ts from before the cut off, and
         #   3. we haven't yet censored.
         #
         # This is limited to 100 events to ensure that we don't try and do too
         # much at once. We'll get called again so this should eventually catch
         # up.
-        #
-        # We use the range [-max_pos, max_pos] to handle backfilled events,
-        # which are given negative stream ordering.
         sql = """
-            SELECT redact_event.event_id, redacts FROM redactions
-            INNER JOIN events AS redact_event USING (event_id)
+            SELECT redactions.event_id, redacts FROM redactions
             LEFT JOIN events AS original_event ON (
-                redact_event.room_id = original_event.room_id
-                AND redacts = original_event.event_id
+                redacts = original_event.event_id
             )
             WHERE NOT have_censored
-            AND ? <= redact_event.stream_ordering AND redact_event.stream_ordering <= ?
-            ORDER BY redact_event.stream_ordering ASC
+            AND redactions.received_ts <= ?
+            ORDER BY redactions.received_ts ASC
             LIMIT ?
         """
 
         rows = yield self._execute(
-            "_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100
+            "_censor_redactions_fetch", None, sql, before_ts, 100
         )
 
         updates = []

From 2b8352e6387a71f8bea8b512f1a491f1fedf06fc Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 1 Oct 2019 13:36:29 +0100
Subject: [PATCH 149/276] Newsfile

---
 changelog.d/6141.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6141.bugfix

diff --git a/changelog.d/6141.bugfix b/changelog.d/6141.bugfix
new file mode 100644
index 000000000..c93920b7b
--- /dev/null
+++ b/changelog.d/6141.bugfix
@@ -0,0 +1 @@
+Fix bad performance of censoring redactions background task.

From ce7a3e7e27ba4215ed86112d2967b0acca4cfb77 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 10:14:01 +0100
Subject: [PATCH 150/276] Fix fetching censored redactions from DB

Fetching a censored redactions caused an exception due to the code
expecting redactions to have a `redact` key, which redacted redactions
don't have.
---
 synapse/storage/events_worker.py | 14 +++++++++++++
 tests/storage/test_redaction.py  | 36 ++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index c6fa7f82f..57ce0304e 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -238,6 +238,20 @@ class EventsWorkerStore(SQLBaseStore):
             # we have to recheck auth now.
 
             if not allow_rejected and entry.event.type == EventTypes.Redaction:
+                if not hasattr(entry.event, "redacts"):
+                    # A redacted redaction doesn't have a `redacts` key, in
+                    # which case lets just withhold the event.
+                    #
+                    # Note: Most of the time if the redactions has been
+                    # redacted we still have the un-redacted event in the DB
+                    # and so we'll still see the `redacts` key. However, this
+                    # isn't always true e.g. if we have censored the event.
+                    logger.debug(
+                        "Withholding redaction event %s as we don't have redacts key",
+                        event_id,
+                    )
+                    continue
+
                 redacted_event_id = entry.event.redacts
                 event_map = yield self._get_events_from_cache_or_db([redacted_event_id])
                 original_event_entry = event_map.get(redacted_event_id)
diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index deecfad9f..427d3c49c 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -118,6 +118,8 @@ class RedactionTestCase(unittest.HomeserverTestCase):
 
         self.get_success(self.store.persist_event(event, context))
 
+        return event
+
     def test_redact(self):
         self.get_success(
             self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
@@ -361,3 +363,37 @@ class RedactionTestCase(unittest.HomeserverTestCase):
         )
 
         self.assert_dict({"content": {}}, json.loads(event_json))
+
+    def test_redact_redaction(self):
+        """Tests that we can redact a redaction and can fetch it again.
+        """
+
+        self.get_success(
+            self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
+        )
+
+        msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t"))
+
+        first_redact_event = self.get_success(
+            self.inject_redaction(
+                self.room1, msg_event.event_id, self.u_alice, "Redacting message"
+            )
+        )
+
+        self.get_success(
+            self.inject_redaction(
+                self.room1,
+                first_redact_event.event_id,
+                self.u_alice,
+                "Redacting redaction",
+            )
+        )
+
+        # Now lets jump to the future where we have censored the redaction event
+        # in the DB.
+        self.reactor.advance(60 * 60 * 24 * 31)
+
+        # We just want to check that fetching the event doesn't raise an exception.
+        self.get_success(
+            self.store.get_event(first_redact_event.event_id, allow_none=True)
+        )

From 33d4ebdf78149705aa4b73cabe593337619ca2a7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 10:18:17 +0100
Subject: [PATCH 151/276] Newsfile

---
 changelog.d/6145.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6145.bugfix

diff --git a/changelog.d/6145.bugfix b/changelog.d/6145.bugfix
new file mode 100644
index 000000000..9e0eb5dd4
--- /dev/null
+++ b/changelog.d/6145.bugfix
@@ -0,0 +1 @@
+Fix fetching censored redactions from DB, which caused APIs like initial sync to fail if it tried to include the censored redaction.

From f44f1d2e8374b7250a8a68cf3a49e6d1ac63b0fb Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 10:36:27 +0100
Subject: [PATCH 152/276] Fix errors storing large retry intervals.

We have set the max retry interval to a value larger than a postgres or
sqlite int can hold, which caused exceptions when updating the
destinations table.

To fix postgres we need to change the column to a bigint, and for sqlite
we lower the max interval to 2**62 (which is still incredibly long).
---
 ...stinations_retry_interval_type.sql.postgres | 18 ++++++++++++++++++
 synapse/util/retryutils.py                     |  2 +-
 tests/storage/test_transactions.py             | 11 +++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres

diff --git a/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres
new file mode 100644
index 000000000..b9bbb18a9
--- /dev/null
+++ b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres
@@ -0,0 +1,18 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We want to store large retry intervals so we upgrade the column from INT
+-- to BIGINT. We don't need to do this on SQLite.
+ALTER TABLE destinations ALTER retry_interval SET DATA TYPE BIGINT;
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index a5f2fbef5..af6958719 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -29,7 +29,7 @@ MIN_RETRY_INTERVAL = 10 * 60 * 1000
 RETRY_MULTIPLIER = 5
 
 # a cap on the backoff. (Essentially none)
-MAX_RETRY_INTERVAL = 2 ** 63
+MAX_RETRY_INTERVAL = 2 ** 62
 
 
 class NotRetryingDestination(Exception):
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index a771d5af2..8e817e2c7 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from synapse.util.retryutils import MAX_RETRY_INTERVAL
+
 from tests.unittest import HomeserverTestCase
 
 
@@ -45,3 +47,12 @@ class TransactionStoreTestCase(HomeserverTestCase):
         """
         d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
         self.get_success(d)
+
+    def test_large_destination_retry(self):
+        d = self.store.set_destination_retry_timings(
+            "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL
+        )
+        self.get_success(d)
+
+        d = self.store.get_destination_retry_timings("example.com")
+        self.get_success(d)

From 2bc027ab71c33960e216ec194612a616a4aa11b8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 10:41:29 +0100
Subject: [PATCH 153/276] Newsfile

---
 changelog.d/6146.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6146.bugfix

diff --git a/changelog.d/6146.bugfix b/changelog.d/6146.bugfix
new file mode 100644
index 000000000..1dad80183
--- /dev/null
+++ b/changelog.d/6146.bugfix
@@ -0,0 +1 @@
+Fix exceptions when storing large retry intervals for down remote servers.

From 5705ecaec6b7a85c152691c79a4fa3526792a3eb Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 2 Oct 2019 11:16:38 +0100
Subject: [PATCH 154/276] Don't 500 code when trying to exchange a revoked 3PID
 invite

While this is not documented in the spec (but should be), Riot (and other clients) revoke 3PID invites by sending a m.room.third_party_invite event with an empty ({}) content to the room's state.
When the invited 3PID gets associated with a MXID, the identity server (which doesn't know about revocations) sends down to the MXID's homeserver all of the undelivered invites it has for this 3PID. The homeserver then tries to talk to the inviting homeserver in order to exchange these invite for m.room.member events.
When one of the invite is revoked, the inviting homeserver responds with a 500 error because it tries to extract a 'display_name' property from the content, which is empty. This might cause the invited server to consider that the server is down and not try to exchange other, valid invites (or at least delay it).

This fix handles the case of revoked invites by avoiding trying to fetch a 'display_name' from the original invite's content, and letting the m.room.member event fail the auth rules (because, since the original invite's content is empty, it doesn't have public keys), which results in sending a 403 with the correct error message to the invited server.
---
 synapse/handlers/federation.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index f72b81d41..a3d7739ea 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2599,8 +2599,19 @@ class FederationHandler(BaseHandler):
                 original_invite_id, allow_none=True
             )
         if original_invite:
-            display_name = original_invite.content["display_name"]
-            event_dict["content"]["third_party_invite"]["display_name"] = display_name
+            # If the m.room.third_party_invite event's content is empty, it means the
+            # invite has been revoked.
+            if original_invite.content:
+                display_name = original_invite.content["display_name"]
+                event_dict["content"]["third_party_invite"]["display_name"] = display_name
+            else:
+                # Don't discard or raise an error here because that's not the right place
+                # to do auth checks. The auth check will fail on this invite because we
+                # won't be able to fetch public keys from the m.room.third_party_invite
+                # event's content (because it's empty).
+                logger.info(
+                    "Found invite event for third_party_invite but it has been revoked"
+                )
         else:
             logger.info(
                 "Could not find invite event for third_party_invite: %r", event_dict

From d69fd53f74f693e0ec756eec479f3d51d93fd2aa Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 11:21:52 +0100
Subject: [PATCH 155/276] Bound find_next_generated_user_id DB query.

We can easily bound the set of user IDs we pull out of the DB, so lets
do that.
---
 synapse/storage/registration.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 241a7be51..1a859352b 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -493,7 +493,9 @@ class RegistrationWorkerStore(SQLBaseStore):
         """
 
         def _find_next_generated_user_id(txn):
-            txn.execute("SELECT name FROM users")
+            # We bound between '@1' and '@a' to avoid pulling the entire table
+            # out.
+            txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'")
 
             regex = re.compile(r"^@(\d+):")
 

From de1823b521cd9b691d060dbdc477be16decdf2af Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 11:23:55 +0100
Subject: [PATCH 156/276] Newsfile

---
 changelog.d/6148.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6148.misc

diff --git a/changelog.d/6148.misc b/changelog.d/6148.misc
new file mode 100644
index 000000000..1d5213345
--- /dev/null
+++ b/changelog.d/6148.misc
@@ -0,0 +1 @@
+Improve performance of `find_next_generated_user_id` DB query.

From 2a1470cd05558d4a3bc69a0bc5e8969ba8631426 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 2 Oct 2019 12:04:22 +0100
Subject: [PATCH 157/276] Fix yields and copy instead of move push rules on
 room upgrade (#6144)

Copy push rules during a room upgrade from the old room to the new room, instead of deleting them from the old room.

For instance, we've defined upgrading of a room multiple times to be possible, and push rules won't be transferred on the second upgrade if they're deleted during the first.

Also fix some missing yields that probably broke things quite a bit.
---
 changelog.d/6144.bugfix         |  1 +
 synapse/handlers/room_member.py |  4 ++--
 synapse/storage/push_rule.py    | 16 ++++++----------
 3 files changed, 9 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/6144.bugfix

diff --git a/changelog.d/6144.bugfix b/changelog.d/6144.bugfix
new file mode 100644
index 000000000..eee63961e
--- /dev/null
+++ b/changelog.d/6144.bugfix
@@ -0,0 +1 @@
+Prevent user push rules being deleted from a room when it is upgraded.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 8abdb1b6e..95a244d86 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -216,8 +216,8 @@ class RoomMemberHandler(object):
                 self.copy_room_tags_and_direct_to_room(
                     predecessor["room_id"], room_id, user_id
                 )
-                # Move over old push rules
-                self.store.move_push_rules_from_room_to_room_for_user(
+                # Copy over push rules
+                yield self.store.copy_push_rules_from_room_to_room_for_user(
                     predecessor["room_id"], room_id, user_id
                 )
         elif event.membership == Membership.LEAVE:
diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py
index a6517c4cf..c4e24edff 100644
--- a/synapse/storage/push_rule.py
+++ b/synapse/storage/push_rule.py
@@ -183,8 +183,8 @@ class PushRulesWorkerStore(
         return results
 
     @defer.inlineCallbacks
-    def move_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
-        """Move a single push rule from one room to another for a specific user.
+    def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
+        """Copy a single push rule from one room to another for a specific user.
 
         Args:
             new_room_id (str): ID of the new room.
@@ -209,14 +209,11 @@ class PushRulesWorkerStore(
             actions=rule["actions"],
         )
 
-        # Delete push rule for the old room
-        yield self.delete_push_rule(user_id, rule["rule_id"])
-
     @defer.inlineCallbacks
-    def move_push_rules_from_room_to_room_for_user(
+    def copy_push_rules_from_room_to_room_for_user(
         self, old_room_id, new_room_id, user_id
     ):
-        """Move all of the push rules from one room to another for a specific
+        """Copy all of the push rules from one room to another for a specific
         user.
 
         Args:
@@ -227,15 +224,14 @@ class PushRulesWorkerStore(
         # Retrieve push rules for this user
         user_push_rules = yield self.get_push_rules_for_user(user_id)
 
-        # Get rules relating to the old room, move them to the new room, then
-        # delete them from the old room
+        # Get rules relating to the old room and copy them to the new room
         for rule in user_push_rules:
             conditions = rule.get("conditions", [])
             if any(
                 (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
                 for c in conditions
             ):
-                self.move_push_rule_from_room_to_room(new_room_id, user_id, rule)
+                yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule)
 
     @defer.inlineCallbacks
     def bulk_get_push_rules_for_room(self, event, context):

From 972c9f65d7ddf94ce024b57397d651d184cb2d26 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 2 Oct 2019 12:17:46 +0100
Subject: [PATCH 158/276] Lint

---
 synapse/handlers/federation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index a3d7739ea..75d79bb8e 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2603,7 +2603,9 @@ class FederationHandler(BaseHandler):
             # invite has been revoked.
             if original_invite.content:
                 display_name = original_invite.content["display_name"]
-                event_dict["content"]["third_party_invite"]["display_name"] = display_name
+                event_dict["content"]["third_party_invite"][
+                    "display_name"
+                ] = display_name
             else:
                 # Don't discard or raise an error here because that's not the right place
                 # to do auth checks. The auth check will fail on this invite because we

From 24efea338d8643ee15f03b30c080a53320926ee8 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 2 Oct 2019 12:20:03 +0100
Subject: [PATCH 159/276] Changelog

---
 changelog.d/6147.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6147.bugfix

diff --git a/changelog.d/6147.bugfix b/changelog.d/6147.bugfix
new file mode 100644
index 000000000..b0f936d28
--- /dev/null
+++ b/changelog.d/6147.bugfix
@@ -0,0 +1 @@
+Don't 500 when trying to exchange a revoked 3PID invite.

From 864f14454322c6cba11476667ade8fc6cbea6f44 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Wed, 2 Oct 2019 05:29:01 -0700
Subject: [PATCH 160/276] Fix up some typechecking (#6150)

* type checking fixes

* changelog
---
 .gitignore                                |  1 +
 changelog.d/6150.misc                     |  1 +
 synapse/api/errors.py                     |  3 ++-
 synapse/api/room_versions.py              |  5 ++++-
 synapse/app/_base.py                      |  4 +++-
 synapse/config/appservice.py              |  5 +++--
 synapse/config/consent_config.py          |  4 ++--
 synapse/config/password_auth_providers.py |  4 +++-
 synapse/config/repository.py              |  5 +++--
 synapse/config/server.py                  | 10 +++++++---
 synapse/config/server_notices_config.py   |  4 ++--
 synapse/logging/opentracing.py            |  9 +++++----
 synapse/logging/utils.py                  | 20 ++++++++++++++++----
 synapse/metrics/__init__.py               |  4 ++--
 synapse/metrics/_exposition.py            |  4 ++--
 synapse/python_dependencies.py            | 17 +++++++++++++----
 synapse/types.py                          |  3 ++-
 synapse/util/async_helpers.py             | 10 +++++++---
 synapse/util/caches/__init__.py           |  3 ++-
 synapse/util/caches/descriptors.py        | 22 ++++++++++++++++++++--
 synapse/util/caches/treecache.py          |  4 +++-
 synapse/util/module_loader.py             |  2 +-
 22 files changed, 104 insertions(+), 40 deletions(-)
 create mode 100644 changelog.d/6150.misc

diff --git a/.gitignore b/.gitignore
index e53d4908d..747b8714d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@
 *.tac
 _trial_temp/
 _trial_temp*/
+/out
 
 # stuff that is likely to exist when you run a server locally
 /*.db
diff --git a/changelog.d/6150.misc b/changelog.d/6150.misc
new file mode 100644
index 000000000..a373c091a
--- /dev/null
+++ b/changelog.d/6150.misc
@@ -0,0 +1 @@
+Expand type-checking on modules imported by synapse.config.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index cf1ebf1af..1bb2e8678 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -17,6 +17,7 @@
 """Contains exceptions and error codes."""
 
 import logging
+from typing import Dict
 
 from six import iteritems
 from six.moves import http_client
@@ -111,7 +112,7 @@ class ProxiedRequestError(SynapseError):
     def __init__(self, code, msg, errcode=Codes.UNKNOWN, additional_fields=None):
         super(ProxiedRequestError, self).__init__(code, msg, errcode)
         if additional_fields is None:
-            self._additional_fields = {}
+            self._additional_fields = {}  # type: Dict
         else:
             self._additional_fields = dict(additional_fields)
 
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 95292b7de..c6f50fd7b 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -12,6 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from typing import Dict
+
 import attr
 
 
@@ -102,4 +105,4 @@ KNOWN_ROOM_VERSIONS = {
         RoomVersions.V4,
         RoomVersions.V5,
     )
-}  # type: dict[str, RoomVersion]
+}  # type: Dict[str, RoomVersion]
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index c30fdeee9..2ac7d5c06 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -263,7 +263,9 @@ def start(hs, listeners=None):
         refresh_certificate(hs)
 
         # Start the tracer
-        synapse.logging.opentracing.init_tracer(hs.config)
+        synapse.logging.opentracing.init_tracer(  # type: ignore[attr-defined] # noqa
+            hs.config
+        )
 
         # It is now safe to start your Synapse.
         hs.start_listening(listeners)
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 8387ff680..28d36b1bc 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+from typing import Dict
 
 from six import string_types
 from six.moves.urllib import parse as urlparse
@@ -56,8 +57,8 @@ def load_appservices(hostname, config_files):
         return []
 
     # Dicts of value -> filename
-    seen_as_tokens = {}
-    seen_ids = {}
+    seen_as_tokens = {}  # type: Dict[str, str]
+    seen_ids = {}  # type: Dict[str, str]
 
     appservices = []
 
diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py
index 94916f3a4..48976e17b 100644
--- a/synapse/config/consent_config.py
+++ b/synapse/config/consent_config.py
@@ -73,8 +73,8 @@ DEFAULT_CONFIG = """\
 
 
 class ConsentConfig(Config):
-    def __init__(self):
-        super(ConsentConfig, self).__init__()
+    def __init__(self, *args):
+        super(ConsentConfig, self).__init__(*args)
 
         self.user_consent_version = None
         self.user_consent_template_dir = None
diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py
index 788c39c9f..c50e24439 100644
--- a/synapse/config/password_auth_providers.py
+++ b/synapse/config/password_auth_providers.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any, List
+
 from synapse.util.module_loader import load_module
 
 from ._base import Config
@@ -22,7 +24,7 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider"
 
 class PasswordAuthProviderConfig(Config):
     def read_config(self, config, **kwargs):
-        self.password_providers = []
+        self.password_providers = []  # type: List[Any]
         providers = []
 
         # We want to be backwards compatible with the old `ldap_config`
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 52e014608..14740891f 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -15,6 +15,7 @@
 
 import os
 from collections import namedtuple
+from typing import Dict, List
 
 from synapse.python_dependencies import DependencyException, check_requirements
 from synapse.util.module_loader import load_module
@@ -61,7 +62,7 @@ def parse_thumbnail_requirements(thumbnail_sizes):
         Dictionary mapping from media type string to list of
         ThumbnailRequirement tuples.
     """
-    requirements = {}
+    requirements = {}  # type: Dict[str, List]
     for size in thumbnail_sizes:
         width = size["width"]
         height = size["height"]
@@ -130,7 +131,7 @@ class ContentRepositoryConfig(Config):
         #
         # We don't create the storage providers here as not all workers need
         # them to be started.
-        self.media_storage_providers = []
+        self.media_storage_providers = []  # type: List[tuple]
 
         for provider_config in storage_providers:
             # We special case the module "file_system" so as not to need to
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 536ee7f29..709bd387e 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -19,6 +19,7 @@ import logging
 import os.path
 import re
 from textwrap import indent
+from typing import List
 
 import attr
 import yaml
@@ -243,7 +244,7 @@ class ServerConfig(Config):
         # events with profile information that differ from the target's global profile.
         self.allow_per_room_profiles = config.get("allow_per_room_profiles", True)
 
-        self.listeners = []
+        self.listeners = []  # type: List[dict]
         for listener in config.get("listeners", []):
             if not isinstance(listener.get("port", None), int):
                 raise ConfigError(
@@ -287,7 +288,10 @@ class ServerConfig(Config):
                 validator=attr.validators.instance_of(bool), default=False
             )
             complexity = attr.ib(
-                validator=attr.validators.instance_of((int, float)), default=1.0
+                validator=attr.validators.instance_of(
+                    (float, int)  # type: ignore[arg-type] # noqa
+                ),
+                default=1.0,
             )
             complexity_error = attr.ib(
                 validator=attr.validators.instance_of(str),
@@ -366,7 +370,7 @@ class ServerConfig(Config):
             "cleanup_extremities_with_dummy_events", True
         )
 
-    def has_tls_listener(self):
+    def has_tls_listener(self) -> bool:
         return any(l["tls"] for l in self.listeners)
 
     def generate_config_section(
diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py
index eaac3d73b..6d4285ef9 100644
--- a/synapse/config/server_notices_config.py
+++ b/synapse/config/server_notices_config.py
@@ -59,8 +59,8 @@ class ServerNoticesConfig(Config):
             None if server notices are not enabled.
     """
 
-    def __init__(self):
-        super(ServerNoticesConfig, self).__init__()
+    def __init__(self, *args):
+        super(ServerNoticesConfig, self).__init__(*args)
         self.server_notices_mxid = None
         self.server_notices_mxid_display_name = None
         self.server_notices_mxid_avatar_url = None
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 308a27213..cd1ff6a51 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -170,6 +170,7 @@ import inspect
 import logging
 import re
 from functools import wraps
+from typing import Dict
 
 from canonicaljson import json
 
@@ -547,7 +548,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T
         return
 
     span = opentracing.tracer.active_span
-    carrier = {}
+    carrier = {}  # type: Dict[str, str]
     opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
 
     for key, value in carrier.items():
@@ -584,7 +585,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True):
 
     span = opentracing.tracer.active_span
 
-    carrier = {}
+    carrier = {}  # type: Dict[str, str]
     opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
 
     for key, value in carrier.items():
@@ -639,7 +640,7 @@ def get_active_span_text_map(destination=None):
     if destination and not whitelisted_homeserver(destination):
         return {}
 
-    carrier = {}
+    carrier = {}  # type: Dict[str, str]
     opentracing.tracer.inject(
         opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
     )
@@ -653,7 +654,7 @@ def active_span_context_as_string():
     Returns:
         The active span context encoded as a string.
     """
-    carrier = {}
+    carrier = {}  # type: Dict[str, str]
     if opentracing:
         opentracing.tracer.inject(
             opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py
index 7df0fa608..6073fc272 100644
--- a/synapse/logging/utils.py
+++ b/synapse/logging/utils.py
@@ -119,7 +119,11 @@ def trace_function(f):
         logger = logging.getLogger(name)
         level = logging.DEBUG
 
-        s = inspect.currentframe().f_back
+        frame = inspect.currentframe()
+        if frame is None:
+            raise Exception("Can't get current frame!")
+
+        s = frame.f_back
 
         to_print = [
             "\t%s:%s %s. Args: args=%s, kwargs=%s"
@@ -144,7 +148,7 @@ def trace_function(f):
             pathname=pathname,
             lineno=lineno,
             msg=msg,
-            args=None,
+            args=tuple(),
             exc_info=None,
         )
 
@@ -157,7 +161,12 @@ def trace_function(f):
 
 
 def get_previous_frames():
-    s = inspect.currentframe().f_back.f_back
+
+    frame = inspect.currentframe()
+    if frame is None:
+        raise Exception("Can't get current frame!")
+
+    s = frame.f_back.f_back
     to_return = []
     while s:
         if s.f_globals["__name__"].startswith("synapse"):
@@ -174,7 +183,10 @@ def get_previous_frames():
 
 
 def get_previous_frame(ignore=[]):
-    s = inspect.currentframe().f_back.f_back
+    frame = inspect.currentframe()
+    if frame is None:
+        raise Exception("Can't get current frame!")
+    s = frame.f_back.f_back
 
     while s:
         if s.f_globals["__name__"].startswith("synapse"):
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index bec3b1339..0b45e1f52 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -125,7 +125,7 @@ class InFlightGauge(object):
         )
 
         # Counts number of in flight blocks for a given set of label values
-        self._registrations = {}
+        self._registrations = {}  # type: Dict
 
         # Protects access to _registrations
         self._lock = threading.Lock()
@@ -226,7 +226,7 @@ class BucketCollector(object):
         # Fetch the data -- this must be synchronous!
         data = self.data_collector()
 
-        buckets = {}
+        buckets = {}  # type: Dict[float, int]
 
         res = []
         for x in data.keys():
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
index 74d9c3ecd..a24810319 100644
--- a/synapse/metrics/_exposition.py
+++ b/synapse/metrics/_exposition.py
@@ -36,9 +36,9 @@ from twisted.web.resource import Resource
 try:
     from prometheus_client.samples import Sample
 except ImportError:
-    Sample = namedtuple(
+    Sample = namedtuple(  # type: ignore[no-redef] # noqa
         "Sample", ["name", "labels", "value", "timestamp", "exemplar"]
-    )  # type: ignore
+    )
 
 
 CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8")
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 0bd563edc..aa7da1c54 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import logging
-from typing import Set
+from typing import List, Set
 
 from pkg_resources import (
     DistributionNotFound,
@@ -73,6 +73,7 @@ REQUIREMENTS = [
     "netaddr>=0.7.18",
     "Jinja2>=2.9",
     "bleach>=1.4.3",
+    "typing-extensions>=3.7.4",
 ]
 
 CONDITIONAL_REQUIREMENTS = {
@@ -144,7 +145,11 @@ def check_requirements(for_feature=None):
             deps_needed.append(dependency)
             errors.append(
                 "Needed %s, got %s==%s"
-                % (dependency, e.dist.project_name, e.dist.version)
+                % (
+                    dependency,
+                    e.dist.project_name,  # type: ignore[attr-defined] # noqa
+                    e.dist.version,  # type: ignore[attr-defined] # noqa
+                )
             )
         except DistributionNotFound:
             deps_needed.append(dependency)
@@ -159,7 +164,7 @@ def check_requirements(for_feature=None):
     if not for_feature:
         # Check the optional dependencies are up to date. We allow them to not be
         # installed.
-        OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), [])
+        OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), [])  # type: List[str]
 
         for dependency in OPTS:
             try:
@@ -168,7 +173,11 @@ def check_requirements(for_feature=None):
                 deps_needed.append(dependency)
                 errors.append(
                     "Needed optional %s, got %s==%s"
-                    % (dependency, e.dist.project_name, e.dist.version)
+                    % (
+                        dependency,
+                        e.dist.project_name,  # type: ignore[attr-defined] # noqa
+                        e.dist.version,  # type: ignore[attr-defined] # noqa
+                    )
                 )
             except DistributionNotFound:
                 # If it's not found, we don't care
diff --git a/synapse/types.py b/synapse/types.py
index 51eadb6ad..8f79797f1 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -318,6 +318,7 @@ class StreamToken(
     )
 ):
     _SEPARATOR = "_"
+    START = None  # type: StreamToken
 
     @classmethod
     def from_string(cls, string):
@@ -402,7 +403,7 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")):
     followed by the "stream_ordering" id of the event it comes after.
     """
 
-    __slots__ = []
+    __slots__ = []  # type: list
 
     @classmethod
     def parse(cls, string):
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index f1c46836b..0d3bdd88c 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -13,9 +13,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import collections
 import logging
 from contextlib import contextmanager
+from typing import Dict, Sequence, Set, Union
 
 from six.moves import range
 
@@ -213,7 +215,9 @@ class Linearizer(object):
         # the first element is the number of things executing, and
         # the second element is an OrderedDict, where the keys are deferreds for the
         # things blocked from executing.
-        self.key_to_defer = {}
+        self.key_to_defer = (
+            {}
+        )  # type: Dict[str, Sequence[Union[int, Dict[defer.Deferred, int]]]]
 
     def queue(self, key):
         # we avoid doing defer.inlineCallbacks here, so that cancellation works correctly.
@@ -340,10 +344,10 @@ class ReadWriteLock(object):
 
     def __init__(self):
         # Latest readers queued
-        self.key_to_current_readers = {}
+        self.key_to_current_readers = {}  # type: Dict[str, Set[defer.Deferred]]
 
         # Latest writer queued
-        self.key_to_current_writer = {}
+        self.key_to_current_writer = {}  # type: Dict[str, defer.Deferred]
 
     @defer.inlineCallbacks
     def read(self, key):
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index b50e3503f..43fd65d69 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -16,6 +16,7 @@
 
 import logging
 import os
+from typing import Dict
 
 import six
 from six.moves import intern
@@ -37,7 +38,7 @@ def get_cache_factor_for(cache_name):
 
 
 caches_by_name = {}
-collectors_by_name = {}
+collectors_by_name = {}  # type: Dict
 
 cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 43f66ec4b..5ac2530a6 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -18,10 +18,12 @@ import inspect
 import logging
 import threading
 from collections import namedtuple
+from typing import Any, cast
 
 from six import itervalues
 
 from prometheus_client import Gauge
+from typing_extensions import Protocol
 
 from twisted.internet import defer
 
@@ -37,6 +39,18 @@ from . import register_cache
 logger = logging.getLogger(__name__)
 
 
+class _CachedFunction(Protocol):
+    invalidate = None  # type: Any
+    invalidate_all = None  # type: Any
+    invalidate_many = None  # type: Any
+    prefill = None  # type: Any
+    cache = None  # type: Any
+    num_args = None  # type: Any
+
+    def __name__(self):
+        ...
+
+
 cache_pending_metric = Gauge(
     "synapse_util_caches_cache_pending",
     "Number of lookups currently pending for this cache",
@@ -245,7 +259,9 @@ class Cache(object):
 
 
 class _CacheDescriptorBase(object):
-    def __init__(self, orig, num_args, inlineCallbacks, cache_context=False):
+    def __init__(
+        self, orig: _CachedFunction, num_args, inlineCallbacks, cache_context=False
+    ):
         self.orig = orig
 
         if inlineCallbacks:
@@ -404,7 +420,7 @@ class CacheDescriptor(_CacheDescriptorBase):
                 return tuple(get_cache_key_gen(args, kwargs))
 
         @functools.wraps(self.orig)
-        def wrapped(*args, **kwargs):
+        def _wrapped(*args, **kwargs):
             # If we're passed a cache_context then we'll want to call its invalidate()
             # whenever we are invalidated
             invalidate_callback = kwargs.pop("on_invalidate", None)
@@ -440,6 +456,8 @@ class CacheDescriptor(_CacheDescriptorBase):
 
             return make_deferred_yieldable(observer)
 
+        wrapped = cast(_CachedFunction, _wrapped)
+
         if self.num_args == 1:
             wrapped.invalidate = lambda key: cache.invalidate(key[0])
             wrapped.prefill = lambda key, val: cache.prefill(key[0], val)
diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py
index 9a72218d8..2ea4e4e91 100644
--- a/synapse/util/caches/treecache.py
+++ b/synapse/util/caches/treecache.py
@@ -1,3 +1,5 @@
+from typing import Dict
+
 from six import itervalues
 
 SENTINEL = object()
@@ -12,7 +14,7 @@ class TreeCache(object):
 
     def __init__(self):
         self.size = 0
-        self.root = {}
+        self.root = {}  # type: Dict
 
     def __setitem__(self, key, value):
         return self.set(key, value)
diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py
index 7ff7eb1e4..2705cbe5f 100644
--- a/synapse/util/module_loader.py
+++ b/synapse/util/module_loader.py
@@ -54,5 +54,5 @@ def load_python_module(location: str):
     if spec is None:
         raise Exception("Unable to load module at %s" % (location,))
     mod = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(mod)
+    spec.loader.exec_module(mod)  # type: ignore
     return mod

From aec1377d0ba09628ffc399eb977ef507fecd1d28 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 2 Oct 2019 13:55:00 +0100
Subject: [PATCH 161/276] 1.4.0rc2

---
 CHANGES.md              | 18 ++++++++++++++++++
 changelog.d/6117.misc   |  1 -
 changelog.d/6135.bugfix |  1 -
 changelog.d/6141.bugfix |  1 -
 changelog.d/6145.bugfix |  1 -
 changelog.d/6146.bugfix |  1 -
 synapse/__init__.py     |  2 +-
 7 files changed, 19 insertions(+), 6 deletions(-)
 delete mode 100644 changelog.d/6117.misc
 delete mode 100644 changelog.d/6135.bugfix
 delete mode 100644 changelog.d/6141.bugfix
 delete mode 100644 changelog.d/6145.bugfix
 delete mode 100644 changelog.d/6146.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 0a0d0b343..78322a08c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,21 @@
+Synapse 1.4.0rc2 (2019-10-02)
+=============================
+
+Bugfixes
+--------
+
+- Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres. ([\#6135](https://github.com/matrix-org/synapse/issues/6135))
+- Fix bad performance of censoring redactions background task. ([\#6141](https://github.com/matrix-org/synapse/issues/6141))
+- Fix fetching censored redactions from DB, which caused APIs like initial sync to fail if it tried to include the censored redaction. ([\#6145](https://github.com/matrix-org/synapse/issues/6145))
+- Fix exceptions when storing large retry intervals for down remote servers. ([\#6146](https://github.com/matrix-org/synapse/issues/6146))
+
+
+Internal Changes
+----------------
+
+- Fix up sample config entry for `redaction_retention_period` option. ([\#6117](https://github.com/matrix-org/synapse/issues/6117))
+
+
 Synapse 1.4.0rc1 (2019-09-26)
 =============================
 
diff --git a/changelog.d/6117.misc b/changelog.d/6117.misc
deleted file mode 100644
index f8bdb58f4..000000000
--- a/changelog.d/6117.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix up sample config entry for `redaction_retention_period` option.
diff --git a/changelog.d/6135.bugfix b/changelog.d/6135.bugfix
deleted file mode 100644
index 5f9f010cb..000000000
--- a/changelog.d/6135.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres.
diff --git a/changelog.d/6141.bugfix b/changelog.d/6141.bugfix
deleted file mode 100644
index c93920b7b..000000000
--- a/changelog.d/6141.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bad performance of censoring redactions background task.
diff --git a/changelog.d/6145.bugfix b/changelog.d/6145.bugfix
deleted file mode 100644
index 9e0eb5dd4..000000000
--- a/changelog.d/6145.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix fetching censored redactions from DB, which caused APIs like initial sync to fail if it tried to include the censored redaction.
diff --git a/changelog.d/6146.bugfix b/changelog.d/6146.bugfix
deleted file mode 100644
index 1dad80183..000000000
--- a/changelog.d/6146.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix exceptions when storing large retry intervals for down remote servers.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index ddfe9ec54..5197eea22 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.4.0rc1"
+__version__ = "1.4.0rc2"

From a5166e4d5febc0e03ba9da9db99127a797a0bc4d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 14:08:35 +0100
Subject: [PATCH 162/276] Land improved room list based on room stats (#6019)

Use room_stats and room_state for room directory search
---
 changelog.d/6019.misc                         |   1 +
 synapse/federation/transport/server.py        |   8 +
 synapse/handlers/room_list.py                 | 331 +++++-------------
 synapse/rest/client/v1/room.py                |   8 +
 synapse/storage/room.py                       | 238 ++++++++-----
 .../schema/delta/56/public_room_list_idx.sql  |  16 +
 tests/handlers/test_roomlist.py               |  39 ---
 7 files changed, 282 insertions(+), 359 deletions(-)
 create mode 100644 changelog.d/6019.misc
 create mode 100644 synapse/storage/schema/delta/56/public_room_list_idx.sql
 delete mode 100644 tests/handlers/test_roomlist.py

diff --git a/changelog.d/6019.misc b/changelog.d/6019.misc
new file mode 100644
index 000000000..dfee73c28
--- /dev/null
+++ b/changelog.d/6019.misc
@@ -0,0 +1 @@
+Improve performance of the public room list directory.
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 7f8a16e35..0f16f21c2 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -765,6 +765,10 @@ class PublicRoomList(BaseFederationServlet):
         else:
             network_tuple = ThirdPartyInstanceID(None, None)
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         data = await maybeDeferred(
             self.handler.get_local_public_room_list,
             limit,
@@ -800,6 +804,10 @@ class PublicRoomList(BaseFederationServlet):
         if search_filter is None:
             logger.warning("Nonefilter")
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         data = await self.handler.get_local_public_room_list(
             limit=limit,
             since_token=since_token,
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index a7e55f00e..4e1cc5460 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -16,8 +16,7 @@
 import logging
 from collections import namedtuple
 
-from six import PY3, iteritems
-from six.moves import range
+from six import iteritems
 
 import msgpack
 from unpaddedbase64 import decode_base64, encode_base64
@@ -27,7 +26,6 @@ from twisted.internet import defer
 from synapse.api.constants import EventTypes, JoinRules
 from synapse.api.errors import Codes, HttpResponseException
 from synapse.types import ThirdPartyInstanceID
-from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.descriptors import cachedInlineCallbacks
 from synapse.util.caches.response_cache import ResponseCache
 
@@ -37,7 +35,6 @@ logger = logging.getLogger(__name__)
 
 REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
 
-
 # This is used to indicate we should only return rooms published to the main list.
 EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
 
@@ -72,6 +69,8 @@ class RoomListHandler(BaseHandler):
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
+            from_federation (bool): true iff the request comes from the federation
+                API
         """
         if not self.enable_room_list_search:
             return defer.succeed({"chunk": [], "total_room_count_estimate": 0})
@@ -133,239 +132,109 @@ class RoomListHandler(BaseHandler):
             from_federation (bool): Whether this request originated from a
                 federating server or a client. Used for room filtering.
             timeout (int|None): Amount of seconds to wait for a response before
-                timing out.
+                timing out. TODO
         """
-        if since_token and since_token != "END":
-            since_token = RoomListNextBatch.from_token(since_token)
-        else:
-            since_token = None
 
-        rooms_to_order_value = {}
-        rooms_to_num_joined = {}
+        # Pagination tokens work by storing the room ID sent in the last batch,
+        # plus the direction (forwards or backwards). Next batch tokens always
+        # go forwards, prev batch tokens always go backwards.
 
-        newly_visible = []
-        newly_unpublished = []
         if since_token:
-            stream_token = since_token.stream_ordering
-            current_public_id = yield self.store.get_current_public_room_stream_id()
-            public_room_stream_id = since_token.public_room_stream_id
-            newly_visible, newly_unpublished = yield self.store.get_public_room_changes(
-                public_room_stream_id, current_public_id, network_tuple=network_tuple
-            )
-        else:
-            stream_token = yield self.store.get_room_max_stream_ordering()
-            public_room_stream_id = yield self.store.get_current_public_room_stream_id()
+            batch_token = RoomListNextBatch.from_token(since_token)
 
-        room_ids = yield self.store.get_public_room_ids_at_stream_id(
-            public_room_stream_id, network_tuple=network_tuple
+            last_room_id = batch_token.last_room_id
+            forwards = batch_token.direction_is_forward
+        else:
+            batch_token = None
+
+            last_room_id = None
+            forwards = True
+
+        # we request one more than wanted to see if there are more pages to come
+        probing_limit = limit + 1 if limit is not None else None
+
+        results = yield self.store.get_largest_public_rooms(
+            network_tuple,
+            search_filter,
+            probing_limit,
+            last_room_id=last_room_id,
+            forwards=forwards,
+            ignore_non_federatable=from_federation,
         )
 
-        # We want to return rooms in a particular order: the number of joined
-        # users. We then arbitrarily use the room_id as a tie breaker.
+        def build_room_entry(room):
+            entry = {
+                "room_id": room["room_id"],
+                "name": room["name"],
+                "topic": room["topic"],
+                "canonical_alias": room["canonical_alias"],
+                "num_joined_members": room["joined_members"],
+                "avatar_url": room["avatar"],
+                "world_readable": room["history_visibility"] == "world_readable",
+                "guest_can_join": room["guest_access"] == "can_join",
+            }
 
-        @defer.inlineCallbacks
-        def get_order_for_room(room_id):
-            # Most of the rooms won't have changed between the since token and
-            # now (especially if the since token is "now"). So, we can ask what
-            # the current users are in a room (that will hit a cache) and then
-            # check if the room has changed since the since token. (We have to
-            # do it in that order to avoid races).
-            # If things have changed then fall back to getting the current state
-            # at the since token.
-            joined_users = yield self.store.get_users_in_room(room_id)
-            if self.store.has_room_changed_since(room_id, stream_token):
-                latest_event_ids = yield self.store.get_forward_extremeties_for_room(
-                    room_id, stream_token
-                )
+            # Filter out Nones – rather omit the field altogether
+            return {k: v for k, v in entry.items() if v is not None}
 
-                if not latest_event_ids:
-                    return
+        results = [build_room_entry(r) for r in results]
 
-                joined_users = yield self.state_handler.get_current_users_in_room(
-                    room_id, latest_event_ids
-                )
+        response = {}
+        num_results = len(results)
+        if limit is not None:
+            more_to_come = num_results == probing_limit
 
-            num_joined_users = len(joined_users)
-            rooms_to_num_joined[room_id] = num_joined_users
+            # Depending on direction we trim either the front or back.
+            if forwards:
+                results = results[:limit]
+            else:
+                results = results[-limit:]
+        else:
+            more_to_come = False
 
-            if num_joined_users == 0:
-                return
+        if num_results > 0:
+            final_room_id = results[-1]["room_id"]
+            initial_room_id = results[0]["room_id"]
 
-            # We want larger rooms to be first, hence negating num_joined_users
-            rooms_to_order_value[room_id] = (-num_joined_users, room_id)
+            if forwards:
+                if batch_token:
+                    # If there was a token given then we assume that there
+                    # must be previous results.
+                    response["prev_batch"] = RoomListNextBatch(
+                        last_room_id=initial_room_id, direction_is_forward=False
+                    ).to_token()
 
-        logger.info(
-            "Getting ordering for %i rooms since %s", len(room_ids), stream_token
+                if more_to_come:
+                    response["next_batch"] = RoomListNextBatch(
+                        last_room_id=final_room_id, direction_is_forward=True
+                    ).to_token()
+            else:
+                if batch_token:
+                    response["next_batch"] = RoomListNextBatch(
+                        last_room_id=final_room_id, direction_is_forward=True
+                    ).to_token()
+
+                if more_to_come:
+                    response["prev_batch"] = RoomListNextBatch(
+                        last_room_id=initial_room_id, direction_is_forward=False
+                    ).to_token()
+
+        for room in results:
+            # populate search result entries with additional fields, namely
+            # 'aliases'
+            room_id = room["room_id"]
+
+            aliases = yield self.store.get_aliases_for_room(room_id)
+            if aliases:
+                room["aliases"] = aliases
+
+        response["chunk"] = results
+
+        response["total_room_count_estimate"] = yield self.store.count_public_rooms(
+            network_tuple, ignore_non_federatable=from_federation
         )
-        yield concurrently_execute(get_order_for_room, room_ids, 10)
 
-        sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1])
-        sorted_rooms = [room_id for room_id, _ in sorted_entries]
-
-        # `sorted_rooms` should now be a list of all public room ids that is
-        # stable across pagination. Therefore, we can use indices into this
-        # list as our pagination tokens.
-
-        # Filter out rooms that we don't want to return
-        rooms_to_scan = [
-            r
-            for r in sorted_rooms
-            if r not in newly_unpublished and rooms_to_num_joined[r] > 0
-        ]
-
-        total_room_count = len(rooms_to_scan)
-
-        if since_token:
-            # Filter out rooms we've already returned previously
-            # `since_token.current_limit` is the index of the last room we
-            # sent down, so we exclude it and everything before/after it.
-            if since_token.direction_is_forward:
-                rooms_to_scan = rooms_to_scan[since_token.current_limit + 1 :]
-            else:
-                rooms_to_scan = rooms_to_scan[: since_token.current_limit]
-                rooms_to_scan.reverse()
-
-        logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan))
-
-        # _append_room_entry_to_chunk will append to chunk but will stop if
-        # len(chunk) > limit
-        #
-        # Normally we will generate enough results on the first iteration here,
-        #  but if there is a search filter, _append_room_entry_to_chunk may
-        # filter some results out, in which case we loop again.
-        #
-        # We don't want to scan over the entire range either as that
-        # would potentially waste a lot of work.
-        #
-        # XXX if there is no limit, we may end up DoSing the server with
-        # calls to get_current_state_ids for every single room on the
-        # server. Surely we should cap this somehow?
-        #
-        if limit:
-            step = limit + 1
-        else:
-            # step cannot be zero
-            step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1
-
-        chunk = []
-        for i in range(0, len(rooms_to_scan), step):
-            if timeout and self.clock.time() > timeout:
-                raise Exception("Timed out searching room directory")
-
-            batch = rooms_to_scan[i : i + step]
-            logger.info("Processing %i rooms for result", len(batch))
-            yield concurrently_execute(
-                lambda r: self._append_room_entry_to_chunk(
-                    r,
-                    rooms_to_num_joined[r],
-                    chunk,
-                    limit,
-                    search_filter,
-                    from_federation=from_federation,
-                ),
-                batch,
-                5,
-            )
-            logger.info("Now %i rooms in result", len(chunk))
-            if len(chunk) >= limit + 1:
-                break
-
-        chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"]))
-
-        # Work out the new limit of the batch for pagination, or None if we
-        # know there are no more results that would be returned.
-        # i.e., [since_token.current_limit..new_limit] is the batch of rooms
-        # we've returned (or the reverse if we paginated backwards)
-        # We tried to pull out limit + 1 rooms above, so if we have <= limit
-        # then we know there are no more results to return
-        new_limit = None
-        if chunk and (not limit or len(chunk) > limit):
-
-            if not since_token or since_token.direction_is_forward:
-                if limit:
-                    chunk = chunk[:limit]
-                last_room_id = chunk[-1]["room_id"]
-            else:
-                if limit:
-                    chunk = chunk[-limit:]
-                last_room_id = chunk[0]["room_id"]
-
-            new_limit = sorted_rooms.index(last_room_id)
-
-        results = {"chunk": chunk, "total_room_count_estimate": total_room_count}
-
-        if since_token:
-            results["new_rooms"] = bool(newly_visible)
-
-        if not since_token or since_token.direction_is_forward:
-            if new_limit is not None:
-                results["next_batch"] = RoomListNextBatch(
-                    stream_ordering=stream_token,
-                    public_room_stream_id=public_room_stream_id,
-                    current_limit=new_limit,
-                    direction_is_forward=True,
-                ).to_token()
-
-            if since_token:
-                results["prev_batch"] = since_token.copy_and_replace(
-                    direction_is_forward=False,
-                    current_limit=since_token.current_limit + 1,
-                ).to_token()
-        else:
-            if new_limit is not None:
-                results["prev_batch"] = RoomListNextBatch(
-                    stream_ordering=stream_token,
-                    public_room_stream_id=public_room_stream_id,
-                    current_limit=new_limit,
-                    direction_is_forward=False,
-                ).to_token()
-
-            if since_token:
-                results["next_batch"] = since_token.copy_and_replace(
-                    direction_is_forward=True,
-                    current_limit=since_token.current_limit - 1,
-                ).to_token()
-
-        return results
-
-    @defer.inlineCallbacks
-    def _append_room_entry_to_chunk(
-        self,
-        room_id,
-        num_joined_users,
-        chunk,
-        limit,
-        search_filter,
-        from_federation=False,
-    ):
-        """Generate the entry for a room in the public room list and append it
-        to the `chunk` if it matches the search filter
-
-        Args:
-            room_id (str): The ID of the room.
-            num_joined_users (int): The number of joined users in the room.
-            chunk (list)
-            limit (int|None): Maximum amount of rooms to display. Function will
-                return if length of chunk is greater than limit + 1.
-            search_filter (dict|None)
-            from_federation (bool): Whether this request originated from a
-                federating server or a client. Used for room filtering.
-        """
-        if limit and len(chunk) > limit + 1:
-            # We've already got enough, so lets just drop it.
-            return
-
-        result = yield self.generate_room_entry(room_id, num_joined_users)
-        if not result:
-            return
-
-        if from_federation and not result.get("m.federate", True):
-            # This is a room that other servers cannot join. Do not show them
-            # this room.
-            return
-
-        if _matches_room_entry(result, search_filter):
-            chunk.append(result)
+        return response
 
     @cachedInlineCallbacks(num_args=1, cache_context=True)
     def generate_room_entry(
@@ -580,32 +449,18 @@ class RoomListNextBatch(
     namedtuple(
         "RoomListNextBatch",
         (
-            "stream_ordering",  # stream_ordering of the first public room list
-            "public_room_stream_id",  # public room stream id for first public room list
-            "current_limit",  # The number of previous rooms returned
+            "last_room_id",  # The room_id to get rooms after/before
             "direction_is_forward",  # Bool if this is a next_batch, false if prev_batch
         ),
     )
 ):
-
-    KEY_DICT = {
-        "stream_ordering": "s",
-        "public_room_stream_id": "p",
-        "current_limit": "n",
-        "direction_is_forward": "d",
-    }
+    KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"}
 
     REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
 
     @classmethod
     def from_token(cls, token):
-        if PY3:
-            # The argument raw=False is only available on new versions of
-            # msgpack, and only really needed on Python 3. Gate it behind
-            # a PY3 check to avoid causing issues on Debian-packaged versions.
-            decoded = msgpack.loads(decode_base64(token), raw=False)
-        else:
-            decoded = msgpack.loads(decode_base64(token))
+        decoded = msgpack.loads(decode_base64(token), raw=False)
         return RoomListNextBatch(
             **{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()}
         )
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 6bf924ded..9c1d41421 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -361,6 +361,10 @@ class PublicRoomListRestServlet(TransactionRestServlet):
         limit = parse_integer(request, "limit", 0)
         since_token = parse_string(request, "since", None)
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         handler = self.hs.get_room_list_handler()
         if server:
             data = yield handler.get_remote_public_room_list(
@@ -398,6 +402,10 @@ class PublicRoomListRestServlet(TransactionRestServlet):
         else:
             network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id)
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         handler = self.hs.get_room_list_handler()
         if server:
             data = yield handler.get_remote_public_room_list(
diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 08e13f3a3..c02787a73 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -63,103 +64,176 @@ class RoomWorkerStore(SQLBaseStore):
             desc="get_public_room_ids",
         )
 
-    @cached(num_args=2, max_entries=100)
-    def get_public_room_ids_at_stream_id(self, stream_id, network_tuple):
-        """Get pulbic rooms for a particular list, or across all lists.
+    def count_public_rooms(self, network_tuple, ignore_non_federatable):
+        """Counts the number of public rooms as tracked in the room_stats_current
+        and room_stats_state table.
 
         Args:
-            stream_id (int)
-            network_tuple (ThirdPartyInstanceID): The list to use (None, None)
-                means the main list, None means all lsits.
+            network_tuple (ThirdPartyInstanceID|None)
+            ignore_non_federatable (bool): If true filters out non-federatable rooms
         """
-        return self.runInteraction(
-            "get_public_room_ids_at_stream_id",
-            self.get_public_room_ids_at_stream_id_txn,
-            stream_id,
-            network_tuple=network_tuple,
-        )
 
-    def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, network_tuple):
-        return {
-            rm
-            for rm, vis in self.get_published_at_stream_id_txn(
-                txn, stream_id, network_tuple=network_tuple
-            ).items()
-            if vis
+        def _count_public_rooms_txn(txn):
+            query_args = []
+
+            if network_tuple:
+                if network_tuple.appservice_id:
+                    published_sql = """
+                        SELECT room_id from appservice_room_list
+                        WHERE appservice_id = ? AND network_id = ?
+                    """
+                    query_args.append(network_tuple.appservice_id)
+                    query_args.append(network_tuple.network_id)
+                else:
+                    published_sql = """
+                        SELECT room_id FROM rooms WHERE is_public
+                    """
+            else:
+                published_sql = """
+                    SELECT room_id FROM rooms WHERE is_public
+                    UNION SELECT room_id from appservice_room_list
+            """
+
+            sql = """
+                SELECT
+                    COALESCE(COUNT(*), 0)
+                FROM (
+                    %(published_sql)s
+                ) published
+                INNER JOIN room_stats_state USING (room_id)
+                INNER JOIN room_stats_current USING (room_id)
+                WHERE
+                    (
+                        join_rules = 'public' OR history_visibility = 'world_readable'
+                    )
+                    AND joined_members > 0
+            """ % {
+                "published_sql": published_sql
+            }
+
+            txn.execute(sql, query_args)
+            return txn.fetchone()[0]
+
+        return self.runInteraction("count_public_rooms", _count_public_rooms_txn)
+
+    @defer.inlineCallbacks
+    def get_largest_public_rooms(
+        self,
+        network_tuple,
+        search_filter,
+        limit,
+        last_room_id,
+        forwards,
+        ignore_non_federatable=False,
+    ):
+        """Gets the largest public rooms (where largest is in terms of joined
+        members, as tracked in the statistics table).
+
+        Args:
+            network_tuple (ThirdPartyInstanceID|None):
+            search_filter (dict|None):
+            limit (int|None): Maxmimum number of rows to return, unlimited otherwise.
+            last_room_id (str|None): if present, a room ID which bounds the
+                result set, and is always *excluded* from the result set.
+            forwards (bool): true iff going forwards, going backwards otherwise
+            ignore_non_federatable (bool): If true filters out non-federatable rooms.
+
+        Returns:
+            Rooms in order: biggest number of joined users first.
+            We then arbitrarily use the room_id as a tie breaker.
+
+        """
+
+        where_clauses = []
+        query_args = []
+
+        if last_room_id:
+            if forwards:
+                where_clauses.append("room_id < ?")
+            else:
+                where_clauses.append("? < room_id")
+
+            query_args += [last_room_id]
+
+        if search_filter and search_filter.get("generic_search_term", None):
+            search_term = "%" + search_filter["generic_search_term"] + "%"
+
+            where_clauses.append(
+                """
+                    (
+                        name LIKE ?
+                        OR topic LIKE ?
+                        OR canonical_alias LIKE ?
+                    )
+                """
+            )
+            query_args += [search_term, search_term, search_term]
+
+        if network_tuple:
+            if network_tuple.appservice_id:
+                published_sql = """
+                    SELECT room_id from appservice_room_list
+                    WHERE appservice_id = ? AND network_id = ?
+                """
+                query_args.append(network_tuple.appservice_id)
+                query_args.append(network_tuple.network_id)
+            else:
+                published_sql = """
+                    SELECT room_id FROM rooms WHERE is_public
+                """
+        else:
+            published_sql = """
+                SELECT room_id FROM rooms WHERE is_public
+                UNION SELECT room_id from appservice_room_list
+            """
+
+        where_clause = ""
+        if where_clauses:
+            where_clause = " AND " + " AND ".join(where_clauses)
+
+        sql = """
+            SELECT
+                room_id, name, topic, canonical_alias, joined_members,
+                avatar, history_visibility, joined_members, guest_access
+            FROM (
+                %(published_sql)s
+            ) published
+            INNER JOIN room_stats_state USING (room_id)
+            INNER JOIN room_stats_current USING (room_id)
+            WHERE
+                (
+                    join_rules = 'public' OR history_visibility = 'world_readable'
+                )
+                AND joined_members > 0
+                %(where_clause)s
+            ORDER BY joined_members %(dir)s, room_id %(dir)s
+        """ % {
+            "published_sql": published_sql,
+            "where_clause": where_clause,
+            "dir": "DESC" if forwards else "ASC",
         }
 
-    def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple):
-        if network_tuple:
-            # We want to get from a particular list. No aggregation required.
+        if limit is not None:
+            query_args.append(limit)
 
-            sql = """
-                SELECT room_id, visibility FROM public_room_list_stream
-                INNER JOIN (
-                    SELECT room_id, max(stream_id) AS stream_id
-                    FROM public_room_list_stream
-                    WHERE stream_id <= ? %s
-                    GROUP BY room_id
-                ) grouped USING (room_id, stream_id)
+            sql += """
+                LIMIT ?
             """
 
-            if network_tuple.appservice_id is not None:
-                txn.execute(
-                    sql % ("AND appservice_id = ? AND network_id = ?",),
-                    (stream_id, network_tuple.appservice_id, network_tuple.network_id),
-                )
-            else:
-                txn.execute(sql % ("AND appservice_id IS NULL",), (stream_id,))
-            return dict(txn)
-        else:
-            # We want to get from all lists, so we need to aggregate the results
+        def _get_largest_public_rooms_txn(txn):
+            txn.execute(sql, query_args)
 
-            logger.info("Executing full list")
+            results = self.cursor_to_dict(txn)
 
-            sql = """
-                SELECT room_id, visibility
-                FROM public_room_list_stream
-                INNER JOIN (
-                    SELECT
-                        room_id, max(stream_id) AS stream_id, appservice_id,
-                        network_id
-                    FROM public_room_list_stream
-                    WHERE stream_id <= ?
-                    GROUP BY room_id, appservice_id, network_id
-                ) grouped USING (room_id, stream_id)
-            """
-
-            txn.execute(sql, (stream_id,))
-
-            results = {}
-            # A room is visible if its visible on any list.
-            for room_id, visibility in txn:
-                results[room_id] = bool(visibility) or results.get(room_id, False)
+            if not forwards:
+                results.reverse()
 
             return results
 
-    def get_public_room_changes(self, prev_stream_id, new_stream_id, network_tuple):
-        def get_public_room_changes_txn(txn):
-            then_rooms = self.get_public_room_ids_at_stream_id_txn(
-                txn, prev_stream_id, network_tuple
-            )
-
-            now_rooms_dict = self.get_published_at_stream_id_txn(
-                txn, new_stream_id, network_tuple
-            )
-
-            now_rooms_visible = set(rm for rm, vis in now_rooms_dict.items() if vis)
-            now_rooms_not_visible = set(
-                rm for rm, vis in now_rooms_dict.items() if not vis
-            )
-
-            newly_visible = now_rooms_visible - then_rooms
-            newly_unpublished = now_rooms_not_visible & then_rooms
-
-            return newly_visible, newly_unpublished
-
-        return self.runInteraction(
-            "get_public_room_changes", get_public_room_changes_txn
+        ret_val = yield self.runInteraction(
+            "get_largest_public_rooms", _get_largest_public_rooms_txn
         )
+        defer.returnValue(ret_val)
 
     @cached(max_entries=10000)
     def is_room_blocked(self, room_id):
diff --git a/synapse/storage/schema/delta/56/public_room_list_idx.sql b/synapse/storage/schema/delta/56/public_room_list_idx.sql
new file mode 100644
index 000000000..7be31ffeb
--- /dev/null
+++ b/synapse/storage/schema/delta/56/public_room_list_idx.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE INDEX public_room_list_stream_network ON public_room_list_stream (appservice_id, network_id, room_id);
diff --git a/tests/handlers/test_roomlist.py b/tests/handlers/test_roomlist.py
deleted file mode 100644
index 61eebb698..000000000
--- a/tests/handlers/test_roomlist.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.handlers.room_list import RoomListNextBatch
-
-import tests.unittest
-import tests.utils
-
-
-class RoomListTestCase(tests.unittest.TestCase):
-    """ Tests RoomList's RoomListNextBatch. """
-
-    def setUp(self):
-        pass
-
-    def test_check_read_batch_tokens(self):
-        batch_token = RoomListNextBatch(
-            stream_ordering="abcdef",
-            public_room_stream_id="123",
-            current_limit=20,
-            direction_is_forward=True,
-        ).to_token()
-        next_batch = RoomListNextBatch.from_token(batch_token)
-        self.assertEquals(next_batch.stream_ordering, "abcdef")
-        self.assertEquals(next_batch.public_room_stream_id, "123")
-        self.assertEquals(next_batch.current_limit, 20)
-        self.assertEquals(next_batch.direction_is_forward, True)

From 03cf4385e098ae73730b9c5ef695fa3f16c1806f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:09:10 +0100
Subject: [PATCH 163/276] Fix public room list pagination.

We incorrectly used `room_id` as to bound the result set, even though we
order by `joined_members, room_id`, leading to incorrect results after
pagination.
---
 synapse/handlers/room_list.py | 33 +++++++++++++++-------
 synapse/storage/room.py       | 53 ++++++++++++++++++++++++-----------
 2 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 4e1cc5460..cfed344d4 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -142,12 +142,12 @@ class RoomListHandler(BaseHandler):
         if since_token:
             batch_token = RoomListNextBatch.from_token(since_token)
 
-            last_room_id = batch_token.last_room_id
+            bounds = (batch_token.last_joined_members, batch_token.last_room_id)
             forwards = batch_token.direction_is_forward
         else:
             batch_token = None
+            bounds = None
 
-            last_room_id = None
             forwards = True
 
         # we request one more than wanted to see if there are more pages to come
@@ -157,7 +157,7 @@ class RoomListHandler(BaseHandler):
             network_tuple,
             search_filter,
             probing_limit,
-            last_room_id=last_room_id,
+            bounds=bounds,
             forwards=forwards,
             ignore_non_federatable=from_federation,
         )
@@ -193,30 +193,38 @@ class RoomListHandler(BaseHandler):
             more_to_come = False
 
         if num_results > 0:
-            final_room_id = results[-1]["room_id"]
-            initial_room_id = results[0]["room_id"]
+            final_entry = results[-1]
+            initial_entry = results[0]
 
             if forwards:
                 if batch_token:
                     # If there was a token given then we assume that there
                     # must be previous results.
                     response["prev_batch"] = RoomListNextBatch(
-                        last_room_id=initial_room_id, direction_is_forward=False
+                        last_joined_members=initial_entry["num_joined_members"],
+                        last_room_id=initial_entry["room_id"],
+                        direction_is_forward=False,
                     ).to_token()
 
                 if more_to_come:
                     response["next_batch"] = RoomListNextBatch(
-                        last_room_id=final_room_id, direction_is_forward=True
+                        last_joined_members=final_entry["num_joined_members"],
+                        last_room_id=final_entry["room_id"],
+                        direction_is_forward=True,
                     ).to_token()
             else:
                 if batch_token:
                     response["next_batch"] = RoomListNextBatch(
-                        last_room_id=final_room_id, direction_is_forward=True
+                        last_joined_members=final_entry["num_joined_members"],
+                        last_room_id=final_entry["room_id"],
+                        direction_is_forward=True,
                     ).to_token()
 
                 if more_to_come:
                     response["prev_batch"] = RoomListNextBatch(
-                        last_room_id=initial_room_id, direction_is_forward=False
+                        last_joined_members=initial_entry["num_joined_members"],
+                        last_room_id=initial_entry["room_id"],
+                        direction_is_forward=False,
                     ).to_token()
 
         for room in results:
@@ -449,12 +457,17 @@ class RoomListNextBatch(
     namedtuple(
         "RoomListNextBatch",
         (
+            "last_joined_members",  # The count to get rooms after/before
             "last_room_id",  # The room_id to get rooms after/before
             "direction_is_forward",  # Bool if this is a next_batch, false if prev_batch
         ),
     )
 ):
-    KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"}
+    KEY_DICT = {
+        "last_joined_members": "m",
+        "last_room_id": "r",
+        "direction_is_forward": "d",
+    }
 
     REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
 
diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index c02787a73..9b7e31583 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -17,6 +17,7 @@
 import collections
 import logging
 import re
+from typing import Optional, Tuple
 
 from canonicaljson import json
 
@@ -25,6 +26,7 @@ from twisted.internet import defer
 from synapse.api.errors import StoreError
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.search import SearchStore
+from synapse.types import ThirdPartyInstanceID
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
 
 logger = logging.getLogger(__name__)
@@ -119,24 +121,25 @@ class RoomWorkerStore(SQLBaseStore):
     @defer.inlineCallbacks
     def get_largest_public_rooms(
         self,
-        network_tuple,
-        search_filter,
-        limit,
-        last_room_id,
-        forwards,
-        ignore_non_federatable=False,
+        network_tuple: Optional[ThirdPartyInstanceID],
+        search_filter: Optional[dict],
+        limit: Optional[int],
+        bounds: Optional[Tuple[int, str]],
+        forwards: bool,
+        ignore_non_federatable: bool = False,
     ):
         """Gets the largest public rooms (where largest is in terms of joined
         members, as tracked in the statistics table).
 
         Args:
-            network_tuple (ThirdPartyInstanceID|None):
-            search_filter (dict|None):
-            limit (int|None): Maxmimum number of rows to return, unlimited otherwise.
-            last_room_id (str|None): if present, a room ID which bounds the
-                result set, and is always *excluded* from the result set.
-            forwards (bool): true iff going forwards, going backwards otherwise
-            ignore_non_federatable (bool): If true filters out non-federatable rooms.
+            network_tuple
+            search_filter
+            limit: Maxmimum number of rows to return, unlimited otherwise.
+            bounds: An uppoer or lower bound to apply to result set if given,
+                consists of a joined member count and room_id (these are
+                excluded from result set).
+            forwards: true iff going forwards, going backwards otherwise
+            ignore_non_federatable: If true filters out non-federatable rooms.
 
         Returns:
             Rooms in order: biggest number of joined users first.
@@ -147,13 +150,29 @@ class RoomWorkerStore(SQLBaseStore):
         where_clauses = []
         query_args = []
 
-        if last_room_id:
+        # Work out the bounds if we're given them, these bounds look slightly
+        # odd, but are designed to help query planner use indices by pulling
+        # out a common bound.
+        if bounds:
+            last_joined_members, last_room_id = bounds
             if forwards:
-                where_clauses.append("room_id < ?")
+                where_clauses.append(
+                    """
+                        joined_members <= ? AND (
+                            joined_members < ? OR room_id < ?
+                        )
+                    """
+                )
             else:
-                where_clauses.append("? < room_id")
+                where_clauses.append(
+                    """
+                        joined_members >= ? AND (
+                            joined_members > ? OR room_id > ?
+                        )
+                    """
+                )
 
-            query_args += [last_room_id]
+            query_args += [last_joined_members, last_joined_members, last_room_id]
 
         if search_filter and search_filter.get("generic_search_term", None):
             search_term = "%" + search_filter["generic_search_term"] + "%"

From 8e32240e6b650746d73315178af9aeb6dfa9be94 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:12:17 +0100
Subject: [PATCH 164/276] Newsfile

---
 changelog.d/6152.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6152.misc

diff --git a/changelog.d/6152.misc b/changelog.d/6152.misc
new file mode 100644
index 000000000..dfee73c28
--- /dev/null
+++ b/changelog.d/6152.misc
@@ -0,0 +1 @@
+Improve performance of the public room list directory.

From 4c4f44930d2153056dc1b992c571f43f2d360d07 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:20:36 +0100
Subject: [PATCH 165/276] Fix not showing non-federatable rooms to remote room
 list queries

---
 synapse/storage/room.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 9b7e31583..615c0d3f6 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -174,6 +174,9 @@ class RoomWorkerStore(SQLBaseStore):
 
             query_args += [last_joined_members, last_joined_members, last_room_id]
 
+        if ignore_non_federatable:
+            where_clauses.append("is_federatable")
+
         if search_filter and search_filter.get("generic_search_term", None):
             search_term = "%" + search_filter["generic_search_term"] + "%"
 

From ed73f04bef517eddebb3b0f0319d6e3322d1b7ec Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:24:33 +0100
Subject: [PATCH 166/276] Newsfile

---
 changelog.d/6153.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6153.misc

diff --git a/changelog.d/6153.misc b/changelog.d/6153.misc
new file mode 100644
index 000000000..dfee73c28
--- /dev/null
+++ b/changelog.d/6153.misc
@@ -0,0 +1 @@
+Improve performance of the public room list directory.

From 7a5f080f91f42fe011a1ab497acdce481187da5f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:47:22 +0100
Subject: [PATCH 167/276] Fix appservice room list pagination

---
 synapse/storage/room.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 9b7e31583..70bd71952 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -150,6 +150,24 @@ class RoomWorkerStore(SQLBaseStore):
         where_clauses = []
         query_args = []
 
+        if network_tuple:
+            if network_tuple.appservice_id:
+                published_sql = """
+                    SELECT room_id from appservice_room_list
+                    WHERE appservice_id = ? AND network_id = ?
+                """
+                query_args.append(network_tuple.appservice_id)
+                query_args.append(network_tuple.network_id)
+            else:
+                published_sql = """
+                    SELECT room_id FROM rooms WHERE is_public
+                """
+        else:
+            published_sql = """
+                SELECT room_id FROM rooms WHERE is_public
+                UNION SELECT room_id from appservice_room_list
+            """
+
         # Work out the bounds if we're given them, these bounds look slightly
         # odd, but are designed to help query planner use indices by pulling
         # out a common bound.
@@ -188,24 +206,6 @@ class RoomWorkerStore(SQLBaseStore):
             )
             query_args += [search_term, search_term, search_term]
 
-        if network_tuple:
-            if network_tuple.appservice_id:
-                published_sql = """
-                    SELECT room_id from appservice_room_list
-                    WHERE appservice_id = ? AND network_id = ?
-                """
-                query_args.append(network_tuple.appservice_id)
-                query_args.append(network_tuple.network_id)
-            else:
-                published_sql = """
-                    SELECT room_id FROM rooms WHERE is_public
-                """
-        else:
-            published_sql = """
-                SELECT room_id FROM rooms WHERE is_public
-                UNION SELECT room_id from appservice_room_list
-            """
-
         where_clause = ""
         if where_clauses:
             where_clause = " AND " + " AND ".join(where_clauses)

From 5be4083306c294ab5905683d32c5fa8c90219c95 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 15:48:36 +0100
Subject: [PATCH 168/276] Newsfile

---
 changelog.d/6154.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6154.misc

diff --git a/changelog.d/6154.misc b/changelog.d/6154.misc
new file mode 100644
index 000000000..dfee73c28
--- /dev/null
+++ b/changelog.d/6154.misc
@@ -0,0 +1 @@
+Improve performance of the public room list directory.

From 6527fa18c1e6f9bcb22318916b5e9534c91c84c1 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 2 Oct 2019 14:44:58 +0100
Subject: [PATCH 169/276] Add test case

---
 synapse/handlers/federation.py    |  2 +-
 tests/handlers/test_federation.py | 83 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 tests/handlers/test_federation.py

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 75d79bb8e..91f3a6929 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2570,7 +2570,7 @@ class FederationHandler(BaseHandler):
         )
 
         try:
-            self.auth.check_from_context(room_version, event, context)
+            yield self.auth.check_from_context(room_version, event, context)
         except AuthError as e:
             logger.warn("Denying third party invite %r because %s", event, e)
             raise e
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
new file mode 100644
index 000000000..20416a014
--- /dev/null
+++ b/tests/handlers/test_federation.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.api.constants import EventTypes
+from synapse.api.errors import AuthError, Codes
+from synapse.rest import admin
+from synapse.rest.client.v1 import login, room
+
+from tests import unittest
+
+
+class FederationTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor, clock):
+        hs = self.setup_test_homeserver(http_client=None)
+        self.handler = hs.get_handlers().federation_handler
+        self.store = hs.get_datastore()
+        return hs
+
+    def test_exchange_revoked_invite(self):
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+
+        room_id = self.helper.create_room_as(
+            room_creator=user_id, tok=tok
+        )
+
+        # Send a 3PID invite event with an empty body so it's considered as a revoked one.
+        invite_token = "sometoken"
+        self.helper.send_state(
+            room_id=room_id,
+            event_type=EventTypes.ThirdPartyInvite,
+            state_key=invite_token,
+            body={},
+            tok=tok,
+        )
+
+        d = self.handler.on_exchange_third_party_invite_request(
+            room_id=room_id,
+            event_dict={
+                "type": EventTypes.Member,
+                "room_id": room_id,
+                "sender": user_id,
+                "state_key": "@someone:example.org",
+                "content": {
+                    "membership": "invite",
+                    "third_party_invite": {
+                        "display_name": "alice",
+                        "signed": {
+                            "mxid": "@alice:localhost",
+                            "token": invite_token,
+                            "signatures": {
+                                "magic.forest": {
+                                    "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        )
+
+        failure = self.get_failure(d, AuthError).value
+
+        self.assertEqual(failure.code, 403, failure)
+        self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure)
+        self.assertEqual(failure.msg, "You are not invited to this room.")

From ebcb6a30d7b1bdb859a1fd22d567b163a1488763 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 11:29:07 +0100
Subject: [PATCH 170/276] Lint

---
 tests/handlers/test_federation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 20416a014..a18dfc0e9 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -69,11 +69,11 @@ class FederationTestCase(unittest.HomeserverTestCase):
                                 "magic.forest": {
                                     "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg"
                                 }
-                            }
-                        }
-                    }
-                }
-            }
+                            },
+                        },
+                    },
+                },
+            },
         )
 
         failure = self.get_failure(d, AuthError).value

From 8a5e8e829b98687ea274fae47db3aa801b6f97d3 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 11:30:43 +0100
Subject: [PATCH 171/276] Lint (again)

---
 tests/handlers/test_federation.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index a18dfc0e9..d56220f40 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -37,9 +37,7 @@ class FederationTestCase(unittest.HomeserverTestCase):
         user_id = self.register_user("kermit", "test")
         tok = self.login("kermit", "test")
 
-        room_id = self.helper.create_room_as(
-            room_creator=user_id, tok=tok
-        )
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
 
         # Send a 3PID invite event with an empty body so it's considered as a revoked one.
         invite_token = "sometoken"

From 0f46bf5737012bb09b40f8e71c5f6db84125df8f Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 3 Oct 2019 12:57:26 +0100
Subject: [PATCH 172/276] Replace client_secret with <redacted> in server logs
 (#6158)

Replace `client_secret` query parameter values with `<redacted>` in the logs. Prevents a scenario where a MITM of server traffic can horde 3pids on their account.
---
 changelog.d/6158.bugfix  | 1 +
 synapse/http/__init__.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6158.bugfix

diff --git a/changelog.d/6158.bugfix b/changelog.d/6158.bugfix
new file mode 100644
index 000000000..6b48fce05
--- /dev/null
+++ b/changelog.d/6158.bugfix
@@ -0,0 +1 @@
+Redact `client_secret` in server logs.
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 3acf772cd..3880ce0d9 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -42,11 +42,13 @@ def cancelled_to_request_timed_out_error(value, timeout):
 
 
 ACCESS_TOKEN_RE = re.compile(r"(\?.*access(_|%5[Ff])token=)[^&]*(.*)$")
+CLIENT_SECRET_RE = re.compile(r"(\?.*client(_|%5[Ff])secret=)[^&]*(.*)$")
 
 
 def redact_uri(uri):
-    """Strips access tokens from the uri replaces with <redacted>"""
-    return ACCESS_TOKEN_RE.sub(r"\1<redacted>\3", uri)
+    """Strips sensitive information from the uri replaces with <redacted>"""
+    uri = ACCESS_TOKEN_RE.sub(r"\1<redacted>\3", uri)
+    return CLIENT_SECRET_RE.sub(r"\1<redacted>\3", uri)
 
 
 class QuieterFileBodyProducer(FileBodyProducer):

From ecb69d824a39d420a20a1c0b24a7174cea392560 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Thu, 3 Oct 2019 13:22:44 +0100
Subject: [PATCH 173/276] 1.4.0

---
 CHANGES.md              | 9 +++++++++
 changelog.d/6158.bugfix | 1 -
 debian/changelog        | 6 ++++++
 synapse/__init__.py     | 2 +-
 4 files changed, 16 insertions(+), 2 deletions(-)
 delete mode 100644 changelog.d/6158.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 78322a08c..165e1d4db 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,12 @@
+Synapse 1.4.0 (2019-10-03)
+==========================
+
+Bugfixes
+--------
+
+- Redact `client_secret` in server logs. ([\#6158](https://github.com/matrix-org/synapse/issues/6158))
+
+
 Synapse 1.4.0rc2 (2019-10-02)
 =============================
 
diff --git a/changelog.d/6158.bugfix b/changelog.d/6158.bugfix
deleted file mode 100644
index 6b48fce05..000000000
--- a/changelog.d/6158.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Redact `client_secret` in server logs.
diff --git a/debian/changelog b/debian/changelog
index 76efc442d..60c682cc5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.4.0) stable; urgency=medium
+
+  * New synapse release 1.4.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Thu, 03 Oct 2019 13:22:25 +0100
+
 matrix-synapse-py3 (1.3.1) stable; urgency=medium
 
   * New synapse release 1.3.1.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 5197eea22..2d52d26af 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.4.0rc2"
+__version__ = "1.4.0"

From c8145af8a9446911bbb52fc0114eebf4eebede4b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:11:04 +0100
Subject: [PATCH 174/276] Cache room membership lookups in
 _get_joined_users_from_context

---
 synapse/storage/roommember.py | 64 ++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 4df8ebdac..f11bbd05f 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -16,6 +16,7 @@
 
 import logging
 from collections import namedtuple
+from typing import Iterable
 
 from six import iteritems, itervalues
 
@@ -32,7 +33,7 @@ from synapse.storage.events_worker import EventsWorkerStore
 from synapse.types import get_domain_from_id
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
-from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
 from synapse.util.stringutils import to_ascii
 
 logger = logging.getLogger(__name__)
@@ -567,25 +568,10 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 missing_member_event_ids.append(event_id)
 
         if missing_member_event_ids:
-            rows = yield self._simple_select_many_batch(
-                table="room_memberships",
-                column="event_id",
-                iterable=missing_member_event_ids,
-                retcols=("user_id", "display_name", "avatar_url"),
-                keyvalues={"membership": Membership.JOIN},
-                batch_size=500,
-                desc="_get_joined_users_from_context",
-            )
-
-            users_in_room.update(
-                {
-                    to_ascii(row["user_id"]): ProfileInfo(
-                        avatar_url=to_ascii(row["avatar_url"]),
-                        display_name=to_ascii(row["display_name"]),
-                    )
-                    for row in rows
-                }
+            event_to_memberships = yield self._get_membership_from_event_ids(
+                missing_member_event_ids
             )
+            users_in_room.update((row for row in event_to_memberships.values() if row))
 
         if event is not None and event.type == EventTypes.Member:
             if event.membership == Membership.JOIN:
@@ -597,6 +583,46 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return users_in_room
 
+    @cached(max_entries=10000)
+    def _get_membership_from_event_id(self, event_id):
+        raise NotADirectoryError()
+
+    @cachedList(
+        cached_method_name="_get_membership_from_event_id",
+        list_name="event_ids",
+        inlineCallbacks=True,
+    )
+    def _get_membership_from_event_ids(self, event_ids: Iterable[str]):
+        """Lookup profile info for set of member event IDs.
+
+        Args:
+            event_ids: The member event IDs to lookup
+
+        Returns:
+            Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID
+            to `user_id` and ProfileInfo (or None if couldn't find event).
+        """
+
+        rows = yield self._simple_select_many_batch(
+            table="room_memberships",
+            column="event_id",
+            iterable=event_ids,
+            retcols=("user_id", "display_name", "avatar_url"),
+            keyvalues={"membership": Membership.JOIN},
+            batch_size=500,
+            desc="_get_membership_from_event_ids",
+        )
+
+        return {
+            row["event_id"]: (
+                row["user_id"],
+                ProfileInfo(
+                    avatar_url=row["avatar_url"], display_name=row["display_name"]
+                ),
+            )
+            for row in rows
+        }
+
     @cachedInlineCallbacks(max_entries=10000)
     def is_host_joined(self, room_id, host):
         if "%" in host or "_" in host:

From 0ccf0ffc855f8d12f16598af77f356f236096994 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:12:24 +0100
Subject: [PATCH 175/276] Newsfile

---
 changelog.d/6159.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6159.misc

diff --git a/changelog.d/6159.misc b/changelog.d/6159.misc
new file mode 100644
index 000000000..06cc163f8
--- /dev/null
+++ b/changelog.d/6159.misc
@@ -0,0 +1 @@
+Add more caching to `_get_joined_users_from_context` DB query.

From d89ebf7c25b4f55d513da41a3ea20b9f8adc62d1 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:23:11 +0100
Subject: [PATCH 176/276] cachedList descriptor doesn't like typing

---
 synapse/storage/roommember.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index f11bbd05f..ef6179cbe 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -592,11 +592,11 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         list_name="event_ids",
         inlineCallbacks=True,
     )
-    def _get_membership_from_event_ids(self, event_ids: Iterable[str]):
+    def _get_membership_from_event_ids(self, event_ids):
         """Lookup profile info for set of member event IDs.
 
         Args:
-            event_ids: The member event IDs to lookup
+            event_ids (Iterable[str]): The member event IDs to lookup
 
         Returns:
             Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID

From a9610cdf02e04ecd8df52ebe74b1cb1338a5be97 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:26:56 +0100
Subject: [PATCH 177/276] Fixup names and comments

---
 synapse/storage/roommember.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index ef6179cbe..37c0723eb 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -568,7 +568,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 missing_member_event_ids.append(event_id)
 
         if missing_member_event_ids:
-            event_to_memberships = yield self._get_membership_from_event_ids(
+            event_to_memberships = yield self._get_joined_profiles_from_event_ids(
                 missing_member_event_ids
             )
             users_in_room.update((row for row in event_to_memberships.values() if row))
@@ -584,23 +584,24 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return users_in_room
 
     @cached(max_entries=10000)
-    def _get_membership_from_event_id(self, event_id):
+    def _get_joined_profile_from_event_id(self, event_id):
         raise NotADirectoryError()
 
     @cachedList(
-        cached_method_name="_get_membership_from_event_id",
+        cached_method_name="_get_joined_profile_from_event_id",
         list_name="event_ids",
         inlineCallbacks=True,
     )
-    def _get_membership_from_event_ids(self, event_ids):
-        """Lookup profile info for set of member event IDs.
+    def _get_joined_profiles_from_event_ids(self, event_ids):
+        """For given set of member event_ids check if they point to a join
+        event and if so return the associated user and profile info.
 
         Args:
             event_ids (Iterable[str]): The member event IDs to lookup
 
         Returns:
             Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID
-            to `user_id` and ProfileInfo (or None if couldn't find event).
+            to `user_id` and ProfileInfo (or None if not join event).
         """
 
         rows = yield self._simple_select_many_batch(

From 84691da6c3058f265f8b86d9a6592ba8ce90e2ed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:27:18 +0100
Subject: [PATCH 178/276] pep8

---
 synapse/storage/roommember.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 37c0723eb..ed7d936b3 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -16,7 +16,6 @@
 
 import logging
 from collections import namedtuple
-from typing import Iterable
 
 from six import iteritems, itervalues
 

From 91f61fc6d74e923822eb92933e0d028848285a40 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:28:31 +0100
Subject: [PATCH 179/276] Use the right error....

---
 synapse/storage/roommember.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index ed7d936b3..1160b98cc 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -584,7 +584,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
     @cached(max_entries=10000)
     def _get_joined_profile_from_event_id(self, event_id):
-        raise NotADirectoryError()
+        raise NotImplementedError()
 
     @cachedList(
         cached_method_name="_get_joined_profile_from_event_id",

From 693156aaf4579f48ad265f42f90b1bd73feda129 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:33:54 +0100
Subject: [PATCH 180/276] Don't regenerate numeric user ID if registration
 fails.

This causes huge amounts of DB IO if registrations start to fail e.g.
because the DB is struggling with IO.
---
 synapse/handlers/register.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 06bd03b77..6dd7ef374 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -220,7 +220,7 @@ class RegistrationHandler(BaseHandler):
             attempts = 0
             user = None
             while not user:
-                localpart = yield self._generate_user_id(attempts > 0)
+                localpart = yield self._generate_user_id()
                 user = UserID(localpart, self.hs.hostname)
                 user_id = user.to_string()
                 yield self.check_user_id_not_appservice_exclusive(user_id)
@@ -379,10 +379,10 @@ class RegistrationHandler(BaseHandler):
                 )
 
     @defer.inlineCallbacks
-    def _generate_user_id(self, reseed=False):
-        if reseed or self._next_generated_user_id is None:
+    def _generate_user_id(self):
+        if self._next_generated_user_id is None:
             with (yield self._generate_user_id_linearizer.queue(())):
-                if reseed or self._next_generated_user_id is None:
+                if self._next_generated_user_id is None:
                     self._next_generated_user_id = (
                         yield self.store.find_next_generated_user_id_localpart()
                     )

From 4fc60f12deef19407e9b761f3d9c24c48384118c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:35:50 +0100
Subject: [PATCH 181/276] Newsfile

---
 changelog.d/6161.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6161.misc

diff --git a/changelog.d/6161.misc b/changelog.d/6161.misc
new file mode 100644
index 000000000..7c5d61cb8
--- /dev/null
+++ b/changelog.d/6161.misc
@@ -0,0 +1 @@
+Don't regenerate numeric user ID if registration fails.

From ab8a64772b9e663e74fbdafef9a729bd49369e65 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:42:32 +0100
Subject: [PATCH 182/276] Remove unused variable

---
 synapse/handlers/register.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 6dd7ef374..53410f120 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -217,7 +217,6 @@ class RegistrationHandler(BaseHandler):
 
         else:
             # autogen a sequential user ID
-            attempts = 0
             user = None
             while not user:
                 localpart = yield self._generate_user_id()
@@ -238,7 +237,6 @@ class RegistrationHandler(BaseHandler):
                     # if user id is taken, just generate another
                     user = None
                     user_id = None
-                    attempts += 1
 
         if not self.hs.config.user_consent_at_registration:
             yield self._auto_join_rooms(user_id)

From 0186ec9df7e55e35fa9b6579869cd308dc178a3c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 3 Oct 2019 17:46:43 +0100
Subject: [PATCH 183/276] Fixup newsfile

---
 changelog.d/6161.bugfix | 1 +
 changelog.d/6161.misc   | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 changelog.d/6161.bugfix
 delete mode 100644 changelog.d/6161.misc

diff --git a/changelog.d/6161.bugfix b/changelog.d/6161.bugfix
new file mode 100644
index 000000000..a0e2adb97
--- /dev/null
+++ b/changelog.d/6161.bugfix
@@ -0,0 +1 @@
+Fix bug where guest account registration can wedge after restart.
diff --git a/changelog.d/6161.misc b/changelog.d/6161.misc
deleted file mode 100644
index 7c5d61cb8..000000000
--- a/changelog.d/6161.misc
+++ /dev/null
@@ -1 +0,0 @@
-Don't regenerate numeric user ID if registration fails.

From 66537e10ce77e47fac52e3f27569ac1ef0f1aaa3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 3 Oct 2019 17:47:20 +0100
Subject: [PATCH 184/276] add some metrics on the federation sender (#6160)

---
 changelog.d/6160.misc                 |  1 +
 synapse/federation/sender/__init__.py | 11 ++++++-----
 synapse/state/__init__.py             | 24 ++++++++++++++++++------
 synapse/storage/roommember.py         | 21 +++++++++++++++------
 synapse/util/metrics.py               |  6 ++++--
 5 files changed, 44 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/6160.misc

diff --git a/changelog.d/6160.misc b/changelog.d/6160.misc
new file mode 100644
index 000000000..3d7cce00e
--- /dev/null
+++ b/changelog.d/6160.misc
@@ -0,0 +1 @@
+Add some metrics on the federation sender.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index d46f4aaeb..2b2ee8612 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -38,7 +38,7 @@ from synapse.metrics import (
     events_processed_counter,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.util.metrics import measure_func
+from synapse.util.metrics import Measure, measure_func
 
 logger = logging.getLogger(__name__)
 
@@ -183,8 +183,8 @@ class FederationSender(object):
                         # Otherwise if the last member on a server in a room is
                         # banned then it won't receive the event because it won't
                         # be in the room after the ban.
-                        destinations = yield self.state.get_current_hosts_in_room(
-                            event.room_id, latest_event_ids=event.prev_event_ids()
+                        destinations = yield self.state.get_hosts_in_room_at_events(
+                            event.room_id, event_ids=event.prev_event_ids()
                         )
                     except Exception:
                         logger.exception(
@@ -207,8 +207,9 @@ class FederationSender(object):
 
                 @defer.inlineCallbacks
                 def handle_room_events(events):
-                    for event in events:
-                        yield handle_event(event)
+                    with Measure(self.clock, "handle_room_events"):
+                        for event in events:
+                            yield handle_event(event)
 
                 events_by_room = {}
                 for event in events:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 2b0f4c79e..dc9f5a900 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -33,7 +33,7 @@ from synapse.state import v1, v2
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.expiringcache import ExpiringCache
-from synapse.util.metrics import Measure
+from synapse.util.metrics import Measure, measure_func
 
 logger = logging.getLogger(__name__)
 
@@ -191,11 +191,22 @@ class StateHandler(object):
         return joined_users
 
     @defer.inlineCallbacks
-    def get_current_hosts_in_room(self, room_id, latest_event_ids=None):
-        if not latest_event_ids:
-            latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
-        logger.debug("calling resolve_state_groups from get_current_hosts_in_room")
-        entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
+    def get_current_hosts_in_room(self, room_id):
+        event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
+        return (yield self.get_hosts_in_room_at_events(room_id, event_ids))
+
+    @defer.inlineCallbacks
+    def get_hosts_in_room_at_events(self, room_id, event_ids):
+        """Get the hosts that were in a room at the given event ids
+
+        Args:
+            room_id (str):
+            event_ids (list[str]):
+
+        Returns:
+            Deferred[list[str]]: the hosts in the room at the given events
+        """
+        entry = yield self.resolve_state_groups_for_events(room_id, event_ids)
         joined_hosts = yield self.store.get_joined_hosts(room_id, entry)
         return joined_hosts
 
@@ -344,6 +355,7 @@ class StateHandler(object):
 
         return context
 
+    @measure_func()
     @defer.inlineCallbacks
     def resolve_state_groups_for_events(self, room_id, event_ids):
         """ Given a list of event_ids this method fetches the state at each
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 4df8ebdac..1550d827b 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -33,6 +33,7 @@ from synapse.types import get_domain_from_id
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+from synapse.util.metrics import Measure
 from synapse.util.stringutils import to_ascii
 
 logger = logging.getLogger(__name__)
@@ -483,6 +484,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
         return result
 
+    @defer.inlineCallbacks
     def get_joined_users_from_state(self, room_id, state_entry):
         state_group = state_entry.state_group
         if not state_group:
@@ -492,9 +494,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # To do this we set the state_group to a new object as object() != object()
             state_group = object()
 
-        return self._get_joined_users_from_context(
-            room_id, state_group, state_entry.state, context=state_entry
-        )
+        with Measure(self._clock, "get_joined_users_from_state"):
+            return (
+                yield self._get_joined_users_from_context(
+                    room_id, state_group, state_entry.state, context=state_entry
+                )
+            )
 
     @cachedInlineCallbacks(
         num_args=2, cache_context=True, iterable=True, max_entries=100000
@@ -669,6 +674,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return True
 
+    @defer.inlineCallbacks
     def get_joined_hosts(self, room_id, state_entry):
         state_group = state_entry.state_group
         if not state_group:
@@ -678,9 +684,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # To do this we set the state_group to a new object as object() != object()
             state_group = object()
 
-        return self._get_joined_hosts(
-            room_id, state_group, state_entry.state, state_entry=state_entry
-        )
+        with Measure(self._clock, "get_joined_hosts"):
+            return (
+                yield self._get_joined_hosts(
+                    room_id, state_group, state_entry.state, state_entry=state_entry
+                )
+            )
 
     @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True)
     # @defer.inlineCallbacks
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 0910930c2..4b1bcdf23 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -60,12 +60,14 @@ in_flight = InFlightGauge(
 )
 
 
-def measure_func(name):
+def measure_func(name=None):
     def wrapper(func):
+        block_name = func.__name__ if name is None else name
+
         @wraps(func)
         @defer.inlineCallbacks
         def measured_func(self, *args, **kwargs):
-            with Measure(self.clock, name):
+            with Measure(self.clock, block_name):
                 r = yield func(self, *args, **kwargs)
             return r
 

From 39b40d6d9989b09de39da5b6d3f85ee535e41138 Mon Sep 17 00:00:00 2001
From: Robert Swain <rob@matrix.org>
Date: Fri, 4 Oct 2019 10:34:52 +0200
Subject: [PATCH 185/276] media/thumbnailer: Better quality for 1-bit / 8-bit
 color palette images (#2142)

Pillow will use nearest neighbour as the resampling algorithm if the
source image is either 1-bit or a color palette using 8 bits. If we
convert to RGB before scaling, we'll probably get a better result.
---
 changelog.d/2142.feature             |  1 +
 synapse/rest/media/v1/thumbnailer.py | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/2142.feature

diff --git a/changelog.d/2142.feature b/changelog.d/2142.feature
new file mode 100644
index 000000000..e21e8325e
--- /dev/null
+++ b/changelog.d/2142.feature
@@ -0,0 +1 @@
+Improve quality of thumbnails for 1-bit/8-bit color palette images.
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index c995d7e04..8cf415e29 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -82,13 +82,21 @@ class Thumbnailer(object):
         else:
             return (max_height * self.width) // self.height, max_height
 
+    def _resize(self, width, height):
+        # 1-bit or 8-bit color palette images need converting to RGB
+        # otherwise they will be scaled using nearest neighbour which
+        # looks awful
+        if self.image.mode in ["1", "P"]:
+            self.image = self.image.convert("RGB")
+        return self.image.resize((width, height), Image.ANTIALIAS)
+
     def scale(self, width, height, output_type):
         """Rescales the image to the given dimensions.
 
         Returns:
             BytesIO: the bytes of the encoded image ready to be written to disk
         """
-        scaled = self.image.resize((width, height), Image.ANTIALIAS)
+        scaled = self._resize(width, height)
         return self._encode_image(scaled, output_type)
 
     def crop(self, width, height, output_type):
@@ -107,13 +115,13 @@ class Thumbnailer(object):
         """
         if width * self.height > height * self.width:
             scaled_height = (width * self.height) // self.width
-            scaled_image = self.image.resize((width, scaled_height), Image.ANTIALIAS)
+            scaled_image = self._resize(width, scaled_height)
             crop_top = (scaled_height - height) // 2
             crop_bottom = height + crop_top
             cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
         else:
             scaled_width = (height * self.width) // self.height
-            scaled_image = self.image.resize((scaled_width, height), Image.ANTIALIAS)
+            scaled_image = self._resize(scaled_width, height)
             crop_left = (scaled_width - width) // 2
             crop_right = width + crop_left
             cropped = scaled_image.crop((crop_left, 0, crop_right, height))

From 13c4345c844e75b0d1a4ce66e4fb2eb9820cb7f6 Mon Sep 17 00:00:00 2001
From: Alexander Maznev <alexander.maznev@gmail.com>
Date: Fri, 4 Oct 2019 04:34:16 -0500
Subject: [PATCH 186/276] Update `user_filters` table to have a unique index,
 and non-null columns (#1172)

---
 changelog.d/1172.misc                         |  1 +
 .../delta/56/unique_user_filter_index.py      | 46 +++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 changelog.d/1172.misc
 create mode 100644 synapse/storage/schema/delta/56/unique_user_filter_index.py

diff --git a/changelog.d/1172.misc b/changelog.d/1172.misc
new file mode 100644
index 000000000..30b3e5608
--- /dev/null
+++ b/changelog.d/1172.misc
@@ -0,0 +1 @@
+Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this.
\ No newline at end of file
diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py
new file mode 100644
index 000000000..4efc1a586
--- /dev/null
+++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py
@@ -0,0 +1,46 @@
+import logging
+
+from synapse.storage.engines import PostgresEngine
+
+logger = logging.getLogger(__name__)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    if isinstance(database_engine, PostgresEngine):
+        select_clause = """
+        CREATE TEMPORARY TABLE user_filters_migration AS
+            SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json
+            FROM user_filters;
+        """
+    else:
+        select_clause = """
+        CREATE TEMPORARY TABLE user_filters_migration AS
+            SELECT * FROM user_filters GROUP BY user_id, filter_id;
+        """
+    sql = (
+        """
+        BEGIN;
+            %s
+            DROP INDEX user_filters_by_user_id_filter_id;
+            DELETE FROM user_filters;
+            ALTER TABLE user_filters
+               ALTER COLUMN user_id SET NOT NULL
+               ALTER COLUMN filter_id SET NOT NULL
+               ALTER COLUMN filter_json SET NOT NULL;
+            INSERT INTO user_filters(user_id, filter_id, filter_json)
+                SELECT * FROM user_filters_migration;
+            DROP TABLE user_filters_migration;
+            CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique
+                ON user_filters(user_id, filter_id);
+        END;
+    """
+        % select_clause
+    )
+    if isinstance(database_engine, PostgresEngine):
+        cur.execute(sql)
+    else:
+        cur.executescript(sql)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    pass

From 81d51ce48b449ee55bdcc17b76050283d848d405 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 4 Oct 2019 11:16:19 +0100
Subject: [PATCH 187/276] Incorporate review

---
 synapse/handlers/federation.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 91f3a6929..58a165488 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2600,20 +2600,14 @@ class FederationHandler(BaseHandler):
             )
         if original_invite:
             # If the m.room.third_party_invite event's content is empty, it means the
-            # invite has been revoked.
-            if original_invite.content:
-                display_name = original_invite.content["display_name"]
-                event_dict["content"]["third_party_invite"][
-                    "display_name"
-                ] = display_name
-            else:
-                # Don't discard or raise an error here because that's not the right place
-                # to do auth checks. The auth check will fail on this invite because we
-                # won't be able to fetch public keys from the m.room.third_party_invite
-                # event's content (because it's empty).
-                logger.info(
-                    "Found invite event for third_party_invite but it has been revoked"
-                )
+            # invite has been revoked. In this case, we don't have to raise an error here
+            # because the auth check will fail on the invite (because it's not able to
+            # fetch public keys from the m.room.third_party_invite event's content, which
+            # is empty.
+            display_name = original_invite.content.get("display_name")
+            event_dict["content"]["third_party_invite"][
+                "display_name"
+            ] = display_name
         else:
             logger.info(
                 "Could not find invite event for third_party_invite: %r", event_dict

From 4676732ca061ede3089b9c9b97a6d6b523b8c8e0 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 4 Oct 2019 11:18:28 +0100
Subject: [PATCH 188/276] Lint

---
 synapse/handlers/federation.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 58a165488..b2b3a7e22 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2605,9 +2605,7 @@ class FederationHandler(BaseHandler):
             # fetch public keys from the m.room.third_party_invite event's content, which
             # is empty.
             display_name = original_invite.content.get("display_name")
-            event_dict["content"]["third_party_invite"][
-                "display_name"
-            ] = display_name
+            event_dict["content"]["third_party_invite"]["display_name"] = display_name
         else:
             logger.info(
                 "Could not find invite event for third_party_invite: %r", event_dict

From 21d51ab59852d6a4d504a6ccd79ad82070c03a12 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 4 Oct 2019 11:21:24 +0100
Subject: [PATCH 189/276] Typo

---
 synapse/handlers/federation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index b2b3a7e22..50fc0fde2 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -2603,7 +2603,7 @@ class FederationHandler(BaseHandler):
             # invite has been revoked. In this case, we don't have to raise an error here
             # because the auth check will fail on the invite (because it's not able to
             # fetch public keys from the m.room.third_party_invite event's content, which
-            # is empty.
+            # is empty).
             display_name = original_invite.content.get("display_name")
             event_dict["content"]["third_party_invite"]["display_name"] = display_name
         else:

From 5119a4cac7f42691beb455eedbefd99a39d64897 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 7 Oct 2019 12:21:17 +0100
Subject: [PATCH 190/276] Fix bug where we didn't pull out event ID

---
 synapse/storage/roommember.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 81a9ab6dc..59a89fad6 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -612,7 +612,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             table="room_memberships",
             column="event_id",
             iterable=event_ids,
-            retcols=("user_id", "display_name", "avatar_url"),
+            retcols=("user_id", "display_name", "avatar_url", "event_id"),
             keyvalues={"membership": Membership.JOIN},
             batch_size=500,
             desc="_get_membership_from_event_ids",

From c8e6c308c6358f20b1d0ef1292f8e9e2f36a549e Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 7 Oct 2019 13:15:35 +0100
Subject: [PATCH 191/276] Fix unique_user_filter_index schema update

---
 synapse/storage/schema/delta/56/unique_user_filter_index.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py
index 4efc1a586..60031f23c 100644
--- a/synapse/storage/schema/delta/56/unique_user_filter_index.py
+++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py
@@ -24,8 +24,8 @@ def run_upgrade(cur, database_engine, *args, **kwargs):
             DROP INDEX user_filters_by_user_id_filter_id;
             DELETE FROM user_filters;
             ALTER TABLE user_filters
-               ALTER COLUMN user_id SET NOT NULL
-               ALTER COLUMN filter_id SET NOT NULL
+               ALTER COLUMN user_id SET NOT NULL,
+               ALTER COLUMN filter_id SET NOT NULL,
                ALTER COLUMN filter_json SET NOT NULL;
             INSERT INTO user_filters(user_id, filter_id, filter_json)
                 SELECT * FROM user_filters_migration;

From aa7a003074e4e42c4ac8a571d2cd18ecfea3990f Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 7 Oct 2019 13:16:54 +0100
Subject: [PATCH 192/276] Changelog

---
 changelog.d/6175.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6175.bugfix

diff --git a/changelog.d/6175.bugfix b/changelog.d/6175.bugfix
new file mode 100644
index 000000000..3cd9a99ed
--- /dev/null
+++ b/changelog.d/6175.bugfix
@@ -0,0 +1 @@
+Fix syntax error in unique_user_filter_index schema update.

From 276ae5c63eaef656d486e190298f7a5ec99a7a5b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 7 Oct 2019 14:41:39 +0100
Subject: [PATCH 193/276] add some logging to the rooms stats updates, to try
 to track down a flaky test (#6167)

---
 changelog.d/6167.misc     | 1 +
 synapse/handlers/stats.py | 1 +
 synapse/storage/stats.py  | 3 +++
 3 files changed, 5 insertions(+)
 create mode 100644 changelog.d/6167.misc

diff --git a/changelog.d/6167.misc b/changelog.d/6167.misc
new file mode 100644
index 000000000..32c96b368
--- /dev/null
+++ b/changelog.d/6167.misc
@@ -0,0 +1 @@
+Add some logging to the rooms stats updates, to try to track down a flaky test.
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index cbac7c347..c62b11311 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -293,6 +293,7 @@ class StatsHandler(StateDeltasHandler):
                 room_state["guest_access"] = event_content.get("guest_access")
 
         for room_id, state in room_to_state_updates.items():
+            logger.info("Updating room_stats_state for %s: %s", room_id, state)
             yield self.store.update_room_state(room_id, state)
 
         return room_to_stats_deltas, user_to_stats_deltas
diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py
index 09190d684..7c224cd3d 100644
--- a/synapse/storage/stats.py
+++ b/synapse/storage/stats.py
@@ -332,6 +332,9 @@ class StatsStore(StateDeltasStore):
         def _bulk_update_stats_delta_txn(txn):
             for stats_type, stats_updates in updates.items():
                 for stats_id, fields in stats_updates.items():
+                    logger.info(
+                        "Updating %s stats for %s: %s", stats_type, stats_id, fields
+                    )
                     self._update_stats_delta_txn(
                         txn,
                         ts=ts,

From 1992f21a9fa00a37963bb6ac11d0e678cc08557e Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 7 Oct 2019 14:54:36 +0100
Subject: [PATCH 194/276] Fix changelog for PR #6175

---
 changelog.d/6175.bugfix | 1 -
 changelog.d/6175.misc   | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)
 delete mode 100644 changelog.d/6175.bugfix
 create mode 100644 changelog.d/6175.misc

diff --git a/changelog.d/6175.bugfix b/changelog.d/6175.bugfix
deleted file mode 100644
index 3cd9a99ed..000000000
--- a/changelog.d/6175.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix syntax error in unique_user_filter_index schema update.
diff --git a/changelog.d/6175.misc b/changelog.d/6175.misc
new file mode 100644
index 000000000..5bb24f02f
--- /dev/null
+++ b/changelog.d/6175.misc
@@ -0,0 +1 @@
+Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this

From dc795ba709f2ffe41671d25d94f21d4b31a5301d Mon Sep 17 00:00:00 2001
From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com>
Date: Mon, 7 Oct 2019 15:41:25 +0100
Subject: [PATCH 195/276] Log responder we are using. (#6139)

This prevents us logging "Responding to media request with responder %s".
---
 changelog.d/6139.misc          | 1 +
 synapse/rest/media/v1/_base.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6139.misc

diff --git a/changelog.d/6139.misc b/changelog.d/6139.misc
new file mode 100644
index 000000000..d4b65e7af
--- /dev/null
+++ b/changelog.d/6139.misc
@@ -0,0 +1 @@
+Log responder when responding to media request.
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 5fefee4dd..65bbf0007 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -195,7 +195,7 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam
         respond_404(request)
         return
 
-    logger.debug("Responding to media request with responder %s")
+    logger.debug("Responding to media request with responder %s", responder)
     add_file_headers(request, media_type, file_size, upload_name)
     try:
         with responder:

From 88957199e7edbd33a66d419224df9ba0eb9e604d Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:16:39 +0100
Subject: [PATCH 196/276] Move client_ips's bg updates to a dedicated store

---
 synapse/storage/client_ips.py | 200 ++++++++++++++++++----------------
 1 file changed, 106 insertions(+), 94 deletions(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index bb135166c..1d89b50f5 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -33,14 +33,14 @@ logger = logging.getLogger(__name__)
 LAST_SEEN_GRANULARITY = 120 * 1000
 
 
-class ClientIpStore(background_updates.BackgroundUpdateStore):
+class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore):
     def __init__(self, db_conn, hs):
 
         self.client_ip_last_seen = Cache(
             name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
         )
 
-        super(ClientIpStore, self).__init__(db_conn, hs)
+        super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.user_ips_max_age = hs.config.user_ips_max_age
 
@@ -92,19 +92,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             "devices_last_seen", self._devices_last_seen_update
         )
 
-        # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
-        self._batch_row_update = {}
-
-        self._client_ip_looper = self._clock.looping_call(
-            self._update_client_ips_batch, 5 * 1000
-        )
-        self.hs.get_reactor().addSystemEventTrigger(
-            "before", "shutdown", self._update_client_ips_batch
-        )
-
-        if self.user_ips_max_age:
-            self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
-
     @defer.inlineCallbacks
     def _remove_user_ip_nonunique(self, progress, batch_size):
         def f(conn):
@@ -303,6 +290,110 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
 
         return batch_size
 
+    @defer.inlineCallbacks
+    def _devices_last_seen_update(self, progress, batch_size):
+        """Background update to insert last seen info into devices table
+        """
+
+        last_user_id = progress.get("last_user_id", "")
+        last_device_id = progress.get("last_device_id", "")
+
+        def _devices_last_seen_update_txn(txn):
+            # This consists of two queries:
+            #
+            #   1. The sub-query searches for the next N devices and joins
+            #      against user_ips to find the max last_seen associated with
+            #      that device.
+            #   2. The outer query then joins again against user_ips on
+            #      user/device/last_seen. This *should* hopefully only
+            #      return one row, but if it does return more than one then
+            #      we'll just end up updating the same device row multiple
+            #      times, which is fine.
+
+            if self.database_engine.supports_tuple_comparison:
+                where_clause = "(user_id, device_id) > (?, ?)"
+                where_args = [last_user_id, last_device_id]
+            else:
+                # We explicitly do a `user_id >= ? AND (...)` here to ensure
+                # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
+                # makes it hard for query optimiser to tell that it can use the
+                # index on user_id
+                where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
+                where_args = [last_user_id, last_user_id, last_device_id]
+
+            sql = """
+                SELECT
+                    last_seen, ip, user_agent, user_id, device_id
+                FROM (
+                    SELECT
+                        user_id, device_id, MAX(u.last_seen) AS last_seen
+                    FROM devices
+                    INNER JOIN user_ips AS u USING (user_id, device_id)
+                    WHERE %(where_clause)s
+                    GROUP BY user_id, device_id
+                    ORDER BY user_id ASC, device_id ASC
+                    LIMIT ?
+                ) c
+                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
+            """ % {
+                "where_clause": where_clause
+            }
+            txn.execute(sql, where_args + [batch_size])
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            sql = """
+                UPDATE devices
+                SET last_seen = ?, ip = ?, user_agent = ?
+                WHERE user_id = ? AND device_id = ?
+            """
+            txn.execute_batch(sql, rows)
+
+            _, _, _, user_id, device_id = rows[-1]
+            self._background_update_progress_txn(
+                txn,
+                "devices_last_seen",
+                {"last_user_id": user_id, "last_device_id": device_id},
+            )
+
+            return len(rows)
+
+        updated = yield self.runInteraction(
+            "_devices_last_seen_update", _devices_last_seen_update_txn
+        )
+
+        if not updated:
+            yield self._end_background_update("devices_last_seen")
+
+        return updated
+
+
+class ClientIpStore(ClientIpBackgroundUpdateStore):
+    def __init__(self, db_conn, hs):
+
+        self.client_ip_last_seen = Cache(
+            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
+        )
+
+        super(ClientIpStore, self).__init__(db_conn, hs)
+
+        self.user_ips_max_age = hs.config.user_ips_max_age
+
+        # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
+        self._batch_row_update = {}
+
+        self._client_ip_looper = self._clock.looping_call(
+            self._update_client_ips_batch, 5 * 1000
+        )
+        self.hs.get_reactor().addSystemEventTrigger(
+            "before", "shutdown", self._update_client_ips_batch
+        )
+
+        if self.user_ips_max_age:
+            self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
+
     @defer.inlineCallbacks
     def insert_client_ip(
         self, user_id, access_token, ip, user_agent, device_id, now=None
@@ -454,85 +545,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             for (access_token, ip), (user_agent, last_seen) in iteritems(results)
         )
 
-    @defer.inlineCallbacks
-    def _devices_last_seen_update(self, progress, batch_size):
-        """Background update to insert last seen info into devices table
-        """
-
-        last_user_id = progress.get("last_user_id", "")
-        last_device_id = progress.get("last_device_id", "")
-
-        def _devices_last_seen_update_txn(txn):
-            # This consists of two queries:
-            #
-            #   1. The sub-query searches for the next N devices and joins
-            #      against user_ips to find the max last_seen associated with
-            #      that device.
-            #   2. The outer query then joins again against user_ips on
-            #      user/device/last_seen. This *should* hopefully only
-            #      return one row, but if it does return more than one then
-            #      we'll just end up updating the same device row multiple
-            #      times, which is fine.
-
-            if self.database_engine.supports_tuple_comparison:
-                where_clause = "(user_id, device_id) > (?, ?)"
-                where_args = [last_user_id, last_device_id]
-            else:
-                # We explicitly do a `user_id >= ? AND (...)` here to ensure
-                # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
-                # makes it hard for query optimiser to tell that it can use the
-                # index on user_id
-                where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
-                where_args = [last_user_id, last_user_id, last_device_id]
-
-            sql = """
-                SELECT
-                    last_seen, ip, user_agent, user_id, device_id
-                FROM (
-                    SELECT
-                        user_id, device_id, MAX(u.last_seen) AS last_seen
-                    FROM devices
-                    INNER JOIN user_ips AS u USING (user_id, device_id)
-                    WHERE %(where_clause)s
-                    GROUP BY user_id, device_id
-                    ORDER BY user_id ASC, device_id ASC
-                    LIMIT ?
-                ) c
-                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
-            """ % {
-                "where_clause": where_clause
-            }
-            txn.execute(sql, where_args + [batch_size])
-
-            rows = txn.fetchall()
-            if not rows:
-                return 0
-
-            sql = """
-                UPDATE devices
-                SET last_seen = ?, ip = ?, user_agent = ?
-                WHERE user_id = ? AND device_id = ?
-            """
-            txn.execute_batch(sql, rows)
-
-            _, _, _, user_id, device_id = rows[-1]
-            self._background_update_progress_txn(
-                txn,
-                "devices_last_seen",
-                {"last_user_id": user_id, "last_device_id": device_id},
-            )
-
-            return len(rows)
-
-        updated = yield self.runInteraction(
-            "_devices_last_seen_update", _devices_last_seen_update_txn
-        )
-
-        if not updated:
-            yield self._end_background_update("devices_last_seen")
-
-        return updated
-
     @wrap_as_background_process("prune_old_user_ips")
     async def _prune_old_user_ips(self):
         """Removes entries in user IPs older than the configured period.

From 2d3b4f42f029da54132b26119e9ec0d902505eef Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:19:25 +0100
Subject: [PATCH 197/276] Move deviceinbox's bg updates to a dedicated store

---
 synapse/storage/deviceinbox.py | 37 ++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py
index 6b7458304..70bc2bb2c 100644
--- a/synapse/storage/deviceinbox.py
+++ b/synapse/storage/deviceinbox.py
@@ -208,11 +208,11 @@ class DeviceInboxWorkerStore(SQLBaseStore):
         )
 
 
-class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore):
+class DeviceInboxBackgroundUpdateStore(BackgroundUpdateStore):
     DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
 
     def __init__(self, db_conn, hs):
-        super(DeviceInboxStore, self).__init__(db_conn, hs)
+        super(DeviceInboxBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.register_background_index_update(
             "device_inbox_stream_index",
@@ -225,6 +225,26 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore):
             self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox
         )
 
+    @defer.inlineCallbacks
+    def _background_drop_index_device_inbox(self, progress, batch_size):
+        def reindex_txn(conn):
+            txn = conn.cursor()
+            txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id")
+            txn.close()
+
+        yield self.runWithConnection(reindex_txn)
+
+        yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID)
+
+        return 1
+
+
+class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
+    DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
+
+    def __init__(self, db_conn, hs):
+        super(DeviceInboxStore, self).__init__(db_conn, hs)
+
         # Map of (user_id, device_id) to the last stream_id that has been
         # deleted up to. This is so that we can no op deletions.
         self._last_device_delete_cache = ExpiringCache(
@@ -435,16 +455,3 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore):
         return self.runInteraction(
             "get_all_new_device_messages", get_all_new_device_messages_txn
         )
-
-    @defer.inlineCallbacks
-    def _background_drop_index_device_inbox(self, progress, batch_size):
-        def reindex_txn(conn):
-            txn = conn.cursor()
-            txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id")
-            txn.close()
-
-        yield self.runWithConnection(reindex_txn)
-
-        yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID)
-
-        return 1

From cef9f6753e51c1fb7b24f2122b0d0ada767d6e08 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:24:03 +0100
Subject: [PATCH 198/276] Move devices's bg updates to a dedicated store

---
 synapse/storage/devices.py | 49 +++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py
index 79a58df59..111bfb3d6 100644
--- a/synapse/storage/devices.py
+++ b/synapse/storage/devices.py
@@ -512,17 +512,9 @@ class DeviceWorkerStore(SQLBaseStore):
         return results
 
 
-class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore):
+class DeviceBackgroundUpdateStore(BackgroundUpdateStore):
     def __init__(self, db_conn, hs):
-        super(DeviceStore, self).__init__(db_conn, hs)
-
-        # Map of (user_id, device_id) -> bool. If there is an entry that implies
-        # the device exists.
-        self.device_id_exists_cache = Cache(
-            name="device_id_exists", keylen=2, max_entries=10000
-        )
-
-        self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000)
+        super(DeviceBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.register_background_index_update(
             "device_lists_stream_idx",
@@ -555,6 +547,31 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore):
             self._drop_device_list_streams_non_unique_indexes,
         )
 
+    @defer.inlineCallbacks
+    def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size):
+        def f(conn):
+            txn = conn.cursor()
+            txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id")
+            txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id")
+            txn.close()
+
+        yield self.runWithConnection(f)
+        yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES)
+        return 1
+
+
+class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
+    def __init__(self, db_conn, hs):
+        super(DeviceStore, self).__init__(db_conn, hs)
+
+        # Map of (user_id, device_id) -> bool. If there is an entry that implies
+        # the device exists.
+        self.device_id_exists_cache = Cache(
+            name="device_id_exists", keylen=2, max_entries=10000
+        )
+
+        self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000)
+
     @defer.inlineCallbacks
     def store_device(self, user_id, device_id, initial_device_display_name):
         """Ensure the given device is known; add it to the store if not
@@ -910,15 +927,3 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore):
             "_prune_old_outbound_device_pokes",
             _prune_txn,
         )
-
-    @defer.inlineCallbacks
-    def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size):
-        def f(conn):
-            txn = conn.cursor()
-            txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id")
-            txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id")
-            txn.close()
-
-        yield self.runWithConnection(f)
-        yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES)
-        return 1

From 54f87e07342ddbf82e9b8ee7c7ce227624c2f97b Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:30:22 +0100
Subject: [PATCH 199/276] Move media_repository's bg updates to a dedicated
 store

---
 synapse/storage/media_repository.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py
index 6b1238ce4..2eb2740d0 100644
--- a/synapse/storage/media_repository.py
+++ b/synapse/storage/media_repository.py
@@ -15,11 +15,10 @@
 from synapse.storage.background_updates import BackgroundUpdateStore
 
 
-class MediaRepositoryStore(BackgroundUpdateStore):
-    """Persistence for attachments and avatars"""
+class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore):
 
     def __init__(self, db_conn, hs):
-        super(MediaRepositoryStore, self).__init__(db_conn, hs)
+        super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.register_background_index_update(
             update_name="local_media_repository_url_idx",
@@ -29,6 +28,13 @@ class MediaRepositoryStore(BackgroundUpdateStore):
             where_clause="url_cache IS NOT NULL",
         )
 
+
+class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
+    """Persistence for attachments and avatars"""
+
+    def __init__(self, db_conn, hs):
+        super(MediaRepositoryStore, self).__init__(db_conn, hs)
+
     def get_local_media(self, media_id):
         """Get the metadata for a local piece of media
         Returns:

From 81e6ffb536b19c662a81736f88ef2f243d425532 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:44:26 +0100
Subject: [PATCH 200/276] Move registration's bg updates to a dedicated store

---
 synapse/storage/registration.py | 198 +++++++++++++++++---------------
 1 file changed, 103 insertions(+), 95 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 1a859352b..1f6c93b73 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -37,7 +37,57 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000
 logger = logging.getLogger(__name__)
 
 
-class RegistrationWorkerStore(SQLBaseStore):
+class RegistrationDeactivationStore(SQLBaseStore):
+    @cachedInlineCallbacks()
+    def get_user_deactivated_status(self, user_id):
+        """Retrieve the value for the `deactivated` property for the provided user.
+
+        Args:
+            user_id (str): The ID of the user to retrieve the status for.
+
+        Returns:
+            defer.Deferred(bool): The requested value.
+        """
+
+        res = yield self._simple_select_one_onecol(
+            table="users",
+            keyvalues={"name": user_id},
+            retcol="deactivated",
+            desc="get_user_deactivated_status",
+        )
+
+        # Convert the integer into a boolean.
+        return res == 1
+
+    @defer.inlineCallbacks
+    def set_user_deactivated_status(self, user_id, deactivated):
+        """Set the `deactivated` property for the provided user to the provided value.
+
+        Args:
+            user_id (str): The ID of the user to set the status for.
+            deactivated (bool): The value to set for `deactivated`.
+        """
+
+        yield self.runInteraction(
+            "set_user_deactivated_status",
+            self.set_user_deactivated_status_txn,
+            user_id,
+            deactivated,
+        )
+
+    def set_user_deactivated_status_txn(self, txn, user_id, deactivated):
+        self._simple_update_one_txn(
+            txn=txn,
+            table="users",
+            keyvalues={"name": user_id},
+            updatevalues={"deactivated": 1 if deactivated else 0},
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_user_deactivated_status, (user_id,)
+        )
+
+
+class RegistrationWorkerStore(RegistrationDeactivationStore):
     def __init__(self, db_conn, hs):
         super(RegistrationWorkerStore, self).__init__(db_conn, hs)
 
@@ -673,27 +723,6 @@ class RegistrationWorkerStore(SQLBaseStore):
             desc="get_id_servers_user_bound",
         )
 
-    @cachedInlineCallbacks()
-    def get_user_deactivated_status(self, user_id):
-        """Retrieve the value for the `deactivated` property for the provided user.
-
-        Args:
-            user_id (str): The ID of the user to retrieve the status for.
-
-        Returns:
-            defer.Deferred(bool): The requested value.
-        """
-
-        res = yield self._simple_select_one_onecol(
-            table="users",
-            keyvalues={"name": user_id},
-            retcol="deactivated",
-            desc="get_user_deactivated_status",
-        )
-
-        # Convert the integer into a boolean.
-        return res == 1
-
     def get_threepid_validation_session(
         self, medium, client_secret, address=None, sid=None, validated=True
     ):
@@ -787,13 +816,14 @@ class RegistrationWorkerStore(SQLBaseStore):
         )
 
 
-class RegistrationStore(
-    RegistrationWorkerStore, background_updates.BackgroundUpdateStore
+class RegistrationBackgroundUpdateStore(
+    RegistrationDeactivationStore, background_updates.BackgroundUpdateStore
 ):
     def __init__(self, db_conn, hs):
-        super(RegistrationStore, self).__init__(db_conn, hs)
+        super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.clock = hs.get_clock()
+        self.config = hs.config
 
         self.register_background_index_update(
             "access_tokens_device_index",
@@ -809,8 +839,6 @@ class RegistrationStore(
             columns=["creation_ts"],
         )
 
-        self._account_validity = hs.config.account_validity
-
         # we no longer use refresh tokens, but it's possible that some people
         # might have a background update queued to build this index. Just
         # clear the background update.
@@ -824,17 +852,6 @@ class RegistrationStore(
             "users_set_deactivated_flag", self._background_update_set_deactivated_flag
         )
 
-        # Create a background job for culling expired 3PID validity tokens
-        def start_cull():
-            # run as a background process to make sure that the database transactions
-            # have a logcontext to report to
-            return run_as_background_process(
-                "cull_expired_threepid_validation_tokens",
-                self.cull_expired_threepid_validation_tokens,
-            )
-
-        hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS)
-
     @defer.inlineCallbacks
     def _background_update_set_deactivated_flag(self, progress, batch_size):
         """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1
@@ -896,6 +913,54 @@ class RegistrationStore(
 
         return nb_processed
 
+    @defer.inlineCallbacks
+    def _bg_user_threepids_grandfather(self, progress, batch_size):
+        """We now track which identity servers a user binds their 3PID to, so
+        we need to handle the case of existing bindings where we didn't track
+        this.
+
+        We do this by grandfathering in existing user threepids assuming that
+        they used one of the server configured trusted identity servers.
+        """
+        id_servers = set(self.config.trusted_third_party_id_servers)
+
+        def _bg_user_threepids_grandfather_txn(txn):
+            sql = """
+                INSERT INTO user_threepid_id_server
+                    (user_id, medium, address, id_server)
+                SELECT user_id, medium, address, ?
+                FROM user_threepids
+            """
+
+            txn.executemany(sql, [(id_server,) for id_server in id_servers])
+
+        if id_servers:
+            yield self.runInteraction(
+                "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn
+            )
+
+        yield self._end_background_update("user_threepids_grandfather")
+
+        return 1
+
+
+class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore):
+    def __init__(self, db_conn, hs):
+        super(RegistrationStore, self).__init__(db_conn, hs)
+
+        self._account_validity = hs.config.account_validity
+
+        # Create a background job for culling expired 3PID validity tokens
+        def start_cull():
+            # run as a background process to make sure that the database transactions
+            # have a logcontext to report to
+            return run_as_background_process(
+                "cull_expired_threepid_validation_tokens",
+                self.cull_expired_threepid_validation_tokens,
+            )
+
+        hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS)
+
     @defer.inlineCallbacks
     def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms):
         """Adds an access token for the given user.
@@ -1244,36 +1309,6 @@ class RegistrationStore(
             desc="get_users_pending_deactivation",
         )
 
-    @defer.inlineCallbacks
-    def _bg_user_threepids_grandfather(self, progress, batch_size):
-        """We now track which identity servers a user binds their 3PID to, so
-        we need to handle the case of existing bindings where we didn't track
-        this.
-
-        We do this by grandfathering in existing user threepids assuming that
-        they used one of the server configured trusted identity servers.
-        """
-        id_servers = set(self.config.trusted_third_party_id_servers)
-
-        def _bg_user_threepids_grandfather_txn(txn):
-            sql = """
-                INSERT INTO user_threepid_id_server
-                    (user_id, medium, address, id_server)
-                SELECT user_id, medium, address, ?
-                FROM user_threepids
-            """
-
-            txn.executemany(sql, [(id_server,) for id_server in id_servers])
-
-        if id_servers:
-            yield self.runInteraction(
-                "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn
-            )
-
-        yield self._end_background_update("user_threepids_grandfather")
-
-        return 1
-
     def validate_threepid_session(self, session_id, client_secret, token, current_ts):
         """Attempt to validate a threepid session using a token
 
@@ -1464,30 +1499,3 @@ class RegistrationStore(
             cull_expired_threepid_validation_tokens_txn,
             self.clock.time_msec(),
         )
-
-    def set_user_deactivated_status_txn(self, txn, user_id, deactivated):
-        self._simple_update_one_txn(
-            txn=txn,
-            table="users",
-            keyvalues={"name": user_id},
-            updatevalues={"deactivated": 1 if deactivated else 0},
-        )
-        self._invalidate_cache_and_stream(
-            txn, self.get_user_deactivated_status, (user_id,)
-        )
-
-    @defer.inlineCallbacks
-    def set_user_deactivated_status(self, user_id, deactivated):
-        """Set the `deactivated` property for the provided user to the provided value.
-
-        Args:
-            user_id (str): The ID of the user to set the status for.
-            deactivated (bool): The value to set for `deactivated`.
-        """
-
-        yield self.runInteraction(
-            "set_user_deactivated_status",
-            self.set_user_deactivated_status_txn,
-            user_id,
-            deactivated,
-        )

From 841054ad96b44161d7a990bc1349ecc70fcd736c Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:47:42 +0100
Subject: [PATCH 201/276] Move search's bg updates to a dedicated store

---
 synapse/storage/search.py | 56 ++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index df87ab6a6..9a41e7800 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -36,7 +36,7 @@ SearchEntry = namedtuple(
 )
 
 
-class SearchStore(BackgroundUpdateStore):
+class SearchBackgroundUpdateStore(BackgroundUpdateStore):
 
     EVENT_SEARCH_UPDATE_NAME = "event_search"
     EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
@@ -44,7 +44,7 @@ class SearchStore(BackgroundUpdateStore):
     EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
 
     def __init__(self, db_conn, hs):
-        super(SearchStore, self).__init__(db_conn, hs)
+        super(SearchBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         if not hs.config.enable_search:
             return
@@ -289,29 +289,6 @@ class SearchStore(BackgroundUpdateStore):
 
         return num_rows
 
-    def store_event_search_txn(self, txn, event, key, value):
-        """Add event to the search table
-
-        Args:
-            txn (cursor):
-            event (EventBase):
-            key (str):
-            value (str):
-        """
-        self.store_search_entries_txn(
-            txn,
-            (
-                SearchEntry(
-                    key=key,
-                    value=value,
-                    event_id=event.event_id,
-                    room_id=event.room_id,
-                    stream_ordering=event.internal_metadata.stream_ordering,
-                    origin_server_ts=event.origin_server_ts,
-                ),
-            ),
-        )
-
     def store_search_entries_txn(self, txn, entries):
         """Add entries to the search table
 
@@ -358,6 +335,35 @@ class SearchStore(BackgroundUpdateStore):
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
 
+
+class SearchStore(SearchBackgroundUpdateStore):
+
+    def __init__(self, db_conn, hs):
+        super(SearchStore, self).__init__(db_conn, hs)
+
+    def store_event_search_txn(self, txn, event, key, value):
+        """Add event to the search table
+
+        Args:
+            txn (cursor):
+            event (EventBase):
+            key (str):
+            value (str):
+        """
+        self.store_search_entries_txn(
+            txn,
+            (
+                SearchEntry(
+                    key=key,
+                    value=value,
+                    event_id=event.event_id,
+                    room_id=event.room_id,
+                    stream_ordering=event.internal_metadata.stream_ordering,
+                    origin_server_ts=event.origin_server_ts,
+                ),
+            ),
+        )
+
     @defer.inlineCallbacks
     def search_msgs(self, room_ids, search_term, keys):
         """Performs a full text search over events with given keys.

From cfccd2d78a0b8338f33a5631d8f0637d4ed07e7e Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 17:56:16 +0100
Subject: [PATCH 202/276] Move state's bg updates to a dedicated store

---
 synapse/storage/state.py | 394 ++++++++++++++++++++-------------------
 1 file changed, 204 insertions(+), 190 deletions(-)

diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 1980a8710..71b533c00 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -353,8 +353,158 @@ class StateFilter(object):
         return member_filter, non_member_filter
 
 
+class StateGroupBackgroundUpdateStore(SQLBaseStore):
+    """Defines functions related to state groups needed to run the state backgroud
+    updates.
+    """
+
+    def _count_state_group_hops_txn(self, txn, state_group):
+        """Given a state group, count how many hops there are in the tree.
+
+        This is used to ensure the delta chains don't get too long.
+        """
+        if isinstance(self.database_engine, PostgresEngine):
+            sql = """
+                WITH RECURSIVE state(state_group) AS (
+                    VALUES(?::bigint)
+                    UNION ALL
+                    SELECT prev_state_group FROM state_group_edges e, state s
+                    WHERE s.state_group = e.state_group
+                )
+                SELECT count(*) FROM state;
+            """
+
+            txn.execute(sql, (state_group,))
+            row = txn.fetchone()
+            if row and row[0]:
+                return row[0]
+            else:
+                return 0
+        else:
+            # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+            # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+            next_group = state_group
+            count = 0
+
+            while next_group:
+                next_group = self._simple_select_one_onecol_txn(
+                    txn,
+                    table="state_group_edges",
+                    keyvalues={"state_group": next_group},
+                    retcol="prev_state_group",
+                    allow_none=True,
+                )
+                if next_group:
+                    count += 1
+
+            return count
+
+    def _get_state_groups_from_groups_txn(
+        self, txn, groups, state_filter=StateFilter.all()
+    ):
+        results = {group: {} for group in groups}
+
+        where_clause, where_args = state_filter.make_sql_filter_clause()
+
+        # Unless the filter clause is empty, we're going to append it after an
+        # existing where clause
+        if where_clause:
+            where_clause = " AND (%s)" % (where_clause,)
+
+        if isinstance(self.database_engine, PostgresEngine):
+            # Temporarily disable sequential scans in this transaction. This is
+            # a temporary hack until we can add the right indices in
+            txn.execute("SET LOCAL enable_seqscan=off")
+
+            # The below query walks the state_group tree so that the "state"
+            # table includes all state_groups in the tree. It then joins
+            # against `state_groups_state` to fetch the latest state.
+            # It assumes that previous state groups are always numerically
+            # lesser.
+            # The PARTITION is used to get the event_id in the greatest state
+            # group for the given type, state_key.
+            # This may return multiple rows per (type, state_key), but last_value
+            # should be the same.
+            sql = """
+                WITH RECURSIVE state(state_group) AS (
+                    VALUES(?::bigint)
+                    UNION ALL
+                    SELECT prev_state_group FROM state_group_edges e, state s
+                    WHERE s.state_group = e.state_group
+                )
+                SELECT DISTINCT type, state_key, last_value(event_id) OVER (
+                    PARTITION BY type, state_key ORDER BY state_group ASC
+                    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+                ) AS event_id FROM state_groups_state
+                WHERE state_group IN (
+                    SELECT state_group FROM state
+                )
+            """
+
+            for group in groups:
+                args = [group]
+                args.extend(where_args)
+
+                txn.execute(sql + where_clause, args)
+                for row in txn:
+                    typ, state_key, event_id = row
+                    key = (typ, state_key)
+                    results[group][key] = event_id
+        else:
+            max_entries_returned = state_filter.max_entries_returned()
+
+            # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+            # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+            for group in groups:
+                next_group = group
+
+                while next_group:
+                    # We did this before by getting the list of group ids, and
+                    # then passing that list to sqlite to get latest event for
+                    # each (type, state_key). However, that was terribly slow
+                    # without the right indices (which we can't add until
+                    # after we finish deduping state, which requires this func)
+                    args = [next_group]
+                    args.extend(where_args)
+
+                    txn.execute(
+                        "SELECT type, state_key, event_id FROM state_groups_state"
+                        " WHERE state_group = ? " + where_clause,
+                        args,
+                    )
+                    results[group].update(
+                        ((typ, state_key), event_id)
+                        for typ, state_key, event_id in txn
+                        if (typ, state_key) not in results[group]
+                    )
+
+                    # If the number of entries in the (type,state_key)->event_id dict
+                    # matches the number of (type,state_keys) types we were searching
+                    # for, then we must have found them all, so no need to go walk
+                    # further down the tree... UNLESS our types filter contained
+                    # wildcards (i.e. Nones) in which case we have to do an exhaustive
+                    # search
+                    if (
+                        max_entries_returned is not None
+                        and len(results[group]) == max_entries_returned
+                    ):
+                        break
+
+                    next_group = self._simple_select_one_onecol_txn(
+                        txn,
+                        table="state_group_edges",
+                        keyvalues={"state_group": next_group},
+                        retcol="prev_state_group",
+                        allow_none=True,
+                    )
+
+        return results
+
+
 # this inherits from EventsWorkerStore because it calls self.get_events
-class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
+class StateGroupWorkerStore(
+    EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore
+):
     """The parts of StateGroupStore that can be called from workers.
     """
 
@@ -694,107 +844,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         return results
 
-    def _get_state_groups_from_groups_txn(
-        self, txn, groups, state_filter=StateFilter.all()
-    ):
-        results = {group: {} for group in groups}
-
-        where_clause, where_args = state_filter.make_sql_filter_clause()
-
-        # Unless the filter clause is empty, we're going to append it after an
-        # existing where clause
-        if where_clause:
-            where_clause = " AND (%s)" % (where_clause,)
-
-        if isinstance(self.database_engine, PostgresEngine):
-            # Temporarily disable sequential scans in this transaction. This is
-            # a temporary hack until we can add the right indices in
-            txn.execute("SET LOCAL enable_seqscan=off")
-
-            # The below query walks the state_group tree so that the "state"
-            # table includes all state_groups in the tree. It then joins
-            # against `state_groups_state` to fetch the latest state.
-            # It assumes that previous state groups are always numerically
-            # lesser.
-            # The PARTITION is used to get the event_id in the greatest state
-            # group for the given type, state_key.
-            # This may return multiple rows per (type, state_key), but last_value
-            # should be the same.
-            sql = """
-                WITH RECURSIVE state(state_group) AS (
-                    VALUES(?::bigint)
-                    UNION ALL
-                    SELECT prev_state_group FROM state_group_edges e, state s
-                    WHERE s.state_group = e.state_group
-                )
-                SELECT DISTINCT type, state_key, last_value(event_id) OVER (
-                    PARTITION BY type, state_key ORDER BY state_group ASC
-                    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-                ) AS event_id FROM state_groups_state
-                WHERE state_group IN (
-                    SELECT state_group FROM state
-                )
-            """
-
-            for group in groups:
-                args = [group]
-                args.extend(where_args)
-
-                txn.execute(sql + where_clause, args)
-                for row in txn:
-                    typ, state_key, event_id = row
-                    key = (typ, state_key)
-                    results[group][key] = event_id
-        else:
-            max_entries_returned = state_filter.max_entries_returned()
-
-            # We don't use WITH RECURSIVE on sqlite3 as there are distributions
-            # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
-            for group in groups:
-                next_group = group
-
-                while next_group:
-                    # We did this before by getting the list of group ids, and
-                    # then passing that list to sqlite to get latest event for
-                    # each (type, state_key). However, that was terribly slow
-                    # without the right indices (which we can't add until
-                    # after we finish deduping state, which requires this func)
-                    args = [next_group]
-                    args.extend(where_args)
-
-                    txn.execute(
-                        "SELECT type, state_key, event_id FROM state_groups_state"
-                        " WHERE state_group = ? " + where_clause,
-                        args,
-                    )
-                    results[group].update(
-                        ((typ, state_key), event_id)
-                        for typ, state_key, event_id in txn
-                        if (typ, state_key) not in results[group]
-                    )
-
-                    # If the number of entries in the (type,state_key)->event_id dict
-                    # matches the number of (type,state_keys) types we were searching
-                    # for, then we must have found them all, so no need to go walk
-                    # further down the tree... UNLESS our types filter contained
-                    # wildcards (i.e. Nones) in which case we have to do an exhaustive
-                    # search
-                    if (
-                        max_entries_returned is not None
-                        and len(results[group]) == max_entries_returned
-                    ):
-                        break
-
-                    next_group = self._simple_select_one_onecol_txn(
-                        txn,
-                        table="state_group_edges",
-                        keyvalues={"state_group": next_group},
-                        retcol="prev_state_group",
-                        allow_none=True,
-                    )
-
-        return results
-
     @defer.inlineCallbacks
     def get_state_for_events(self, event_ids, state_filter=StateFilter.all()):
         """Given a list of event_ids and type tuples, return a list of state
@@ -1238,66 +1287,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         return self.runInteraction("store_state_group", _store_state_group_txn)
 
-    def _count_state_group_hops_txn(self, txn, state_group):
-        """Given a state group, count how many hops there are in the tree.
 
-        This is used to ensure the delta chains don't get too long.
-        """
-        if isinstance(self.database_engine, PostgresEngine):
-            sql = """
-                WITH RECURSIVE state(state_group) AS (
-                    VALUES(?::bigint)
-                    UNION ALL
-                    SELECT prev_state_group FROM state_group_edges e, state s
-                    WHERE s.state_group = e.state_group
-                )
-                SELECT count(*) FROM state;
-            """
-
-            txn.execute(sql, (state_group,))
-            row = txn.fetchone()
-            if row and row[0]:
-                return row[0]
-            else:
-                return 0
-        else:
-            # We don't use WITH RECURSIVE on sqlite3 as there are distributions
-            # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
-            next_group = state_group
-            count = 0
-
-            while next_group:
-                next_group = self._simple_select_one_onecol_txn(
-                    txn,
-                    table="state_group_edges",
-                    keyvalues={"state_group": next_group},
-                    retcol="prev_state_group",
-                    allow_none=True,
-                )
-                if next_group:
-                    count += 1
-
-            return count
-
-
-class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
-    """ Keeps track of the state at a given event.
-
-    This is done by the concept of `state groups`. Every event is a assigned
-    a state group (identified by an arbitrary string), which references a
-    collection of state events. The current state of an event is then the
-    collection of state events referenced by the event's state group.
-
-    Hence, every change in the current state causes a new state group to be
-    generated. However, if no change happens (e.g., if we get a message event
-    with only one parent it inherits the state group from its parent.)
-
-    There are three tables:
-      * `state_groups`: Stores group name, first event with in the group and
-        room id.
-      * `event_to_state_groups`: Maps events to state groups.
-      * `state_groups_state`: Maps state group to state events.
-    """
+class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore):
 
     STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
     STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
@@ -1305,7 +1296,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
     EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
 
     def __init__(self, db_conn, hs):
-        super(StateStore, self).__init__(db_conn, hs)
+        super(StateBackgroundUpdateStore, self).__init__(db_conn, hs)
         self.register_background_update_handler(
             self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME,
             self._background_deduplicate_state,
@@ -1327,34 +1318,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
             columns=["state_group"],
         )
 
-    def _store_event_state_mappings_txn(self, txn, events_and_contexts):
-        state_groups = {}
-        for event, context in events_and_contexts:
-            if event.internal_metadata.is_outlier():
-                continue
-
-            # if the event was rejected, just give it the same state as its
-            # predecessor.
-            if context.rejected:
-                state_groups[event.event_id] = context.prev_group
-                continue
-
-            state_groups[event.event_id] = context.state_group
-
-        self._simple_insert_many_txn(
-            txn,
-            table="event_to_state_groups",
-            values=[
-                {"state_group": state_group_id, "event_id": event_id}
-                for event_id, state_group_id in iteritems(state_groups)
-            ],
-        )
-
-        for event_id, state_group_id in iteritems(state_groups):
-            txn.call_after(
-                self._get_state_group_for_event.prefill, (event_id,), state_group_id
-            )
-
     @defer.inlineCallbacks
     def _background_deduplicate_state(self, progress, batch_size):
         """This background update will slowly deduplicate state by reencoding
@@ -1527,3 +1490,54 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
         yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME)
 
         return 1
+
+
+class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore):
+    """ Keeps track of the state at a given event.
+
+    This is done by the concept of `state groups`. Every event is a assigned
+    a state group (identified by an arbitrary string), which references a
+    collection of state events. The current state of an event is then the
+    collection of state events referenced by the event's state group.
+
+    Hence, every change in the current state causes a new state group to be
+    generated. However, if no change happens (e.g., if we get a message event
+    with only one parent it inherits the state group from its parent.)
+
+    There are three tables:
+      * `state_groups`: Stores group name, first event with in the group and
+        room id.
+      * `event_to_state_groups`: Maps events to state groups.
+      * `state_groups_state`: Maps state group to state events.
+    """
+
+    def __init__(self, db_conn, hs):
+        super(StateStore, self).__init__(db_conn, hs)
+
+    def _store_event_state_mappings_txn(self, txn, events_and_contexts):
+        state_groups = {}
+        for event, context in events_and_contexts:
+            if event.internal_metadata.is_outlier():
+                continue
+
+            # if the event was rejected, just give it the same state as its
+            # predecessor.
+            if context.rejected:
+                state_groups[event.event_id] = context.prev_group
+                continue
+
+            state_groups[event.event_id] = context.state_group
+
+        self._simple_insert_many_txn(
+            txn,
+            table="event_to_state_groups",
+            values=[
+                {"state_group": state_group_id, "event_id": event_id}
+                for event_id, state_group_id in iteritems(state_groups)
+            ],
+        )
+
+        for event_id, state_group_id in iteritems(state_groups):
+            txn.call_after(
+                self._get_state_group_for_event.prefill, (event_id,), state_group_id
+            )

From e106a0e4db23fa1fedcce1c169985a8c91181c86 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 18:19:08 +0100
Subject: [PATCH 203/276] Move user_directory's bg updates to a dedicated store

---
 synapse/storage/user_directory.py | 178 ++++++++++++++++--------------
 1 file changed, 94 insertions(+), 84 deletions(-)

diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py
index b5188d9be..1b1e4751b 100644
--- a/synapse/storage/user_directory.py
+++ b/synapse/storage/user_directory.py
@@ -32,14 +32,14 @@ logger = logging.getLogger(__name__)
 TEMP_TABLE = "_temp_populate_user_directory"
 
 
-class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore):
+class UserDirectoryBackgroundUpdateStore(StateDeltasStore, BackgroundUpdateStore):
 
     # How many records do we calculate before sending it to
     # add_users_who_share_private_rooms?
     SHARE_PRIVATE_WORKING_SET = 500
 
     def __init__(self, db_conn, hs):
-        super(UserDirectoryStore, self).__init__(db_conn, hs)
+        super(UserDirectoryBackgroundUpdateStore, self).__init__(db_conn, hs)
 
         self.server_name = hs.hostname
 
@@ -452,55 +452,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore):
             "update_profile_in_user_dir", _update_profile_in_user_dir_txn
         )
 
-    def remove_from_user_dir(self, user_id):
-        def _remove_from_user_dir_txn(txn):
-            self._simple_delete_txn(
-                txn, table="user_directory", keyvalues={"user_id": user_id}
-            )
-            self._simple_delete_txn(
-                txn, table="user_directory_search", keyvalues={"user_id": user_id}
-            )
-            self._simple_delete_txn(
-                txn, table="users_in_public_rooms", keyvalues={"user_id": user_id}
-            )
-            self._simple_delete_txn(
-                txn,
-                table="users_who_share_private_rooms",
-                keyvalues={"user_id": user_id},
-            )
-            self._simple_delete_txn(
-                txn,
-                table="users_who_share_private_rooms",
-                keyvalues={"other_user_id": user_id},
-            )
-            txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
-
-        return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn)
-
-    @defer.inlineCallbacks
-    def get_users_in_dir_due_to_room(self, room_id):
-        """Get all user_ids that are in the room directory because they're
-        in the given room_id
-        """
-        user_ids_share_pub = yield self._simple_select_onecol(
-            table="users_in_public_rooms",
-            keyvalues={"room_id": room_id},
-            retcol="user_id",
-            desc="get_users_in_dir_due_to_room",
-        )
-
-        user_ids_share_priv = yield self._simple_select_onecol(
-            table="users_who_share_private_rooms",
-            keyvalues={"room_id": room_id},
-            retcol="other_user_id",
-            desc="get_users_in_dir_due_to_room",
-        )
-
-        user_ids = set(user_ids_share_pub)
-        user_ids.update(user_ids_share_priv)
-
-        return user_ids
-
     def add_users_who_share_private_room(self, room_id, user_id_tuples):
         """Insert entries into the users_who_share_private_rooms table. The first
         user should be a local user.
@@ -551,6 +502,98 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore):
             "add_users_in_public_rooms", _add_users_in_public_rooms_txn
         )
 
+    def delete_all_from_user_dir(self):
+        """Delete the entire user directory
+        """
+
+        def _delete_all_from_user_dir_txn(txn):
+            txn.execute("DELETE FROM user_directory")
+            txn.execute("DELETE FROM user_directory_search")
+            txn.execute("DELETE FROM users_in_public_rooms")
+            txn.execute("DELETE FROM users_who_share_private_rooms")
+            txn.call_after(self.get_user_in_directory.invalidate_all)
+
+        return self.runInteraction(
+            "delete_all_from_user_dir", _delete_all_from_user_dir_txn
+        )
+
+    @cached()
+    def get_user_in_directory(self, user_id):
+        return self._simple_select_one(
+            table="user_directory",
+            keyvalues={"user_id": user_id},
+            retcols=("display_name", "avatar_url"),
+            allow_none=True,
+            desc="get_user_in_directory",
+        )
+
+    def update_user_directory_stream_pos(self, stream_id):
+        return self._simple_update_one(
+            table="user_directory_stream_pos",
+            keyvalues={},
+            updatevalues={"stream_id": stream_id},
+            desc="update_user_directory_stream_pos",
+        )
+
+
+class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
+
+    # How many records do we calculate before sending it to
+    # add_users_who_share_private_rooms?
+    SHARE_PRIVATE_WORKING_SET = 500
+
+    def __init__(self, db_conn, hs):
+        super(UserDirectoryStore, self).__init__(db_conn, hs)
+
+    def remove_from_user_dir(self, user_id):
+        def _remove_from_user_dir_txn(txn):
+            self._simple_delete_txn(
+                txn, table="user_directory", keyvalues={"user_id": user_id}
+            )
+            self._simple_delete_txn(
+                txn, table="user_directory_search", keyvalues={"user_id": user_id}
+            )
+            self._simple_delete_txn(
+                txn, table="users_in_public_rooms", keyvalues={"user_id": user_id}
+            )
+            self._simple_delete_txn(
+                txn,
+                table="users_who_share_private_rooms",
+                keyvalues={"user_id": user_id},
+            )
+            self._simple_delete_txn(
+                txn,
+                table="users_who_share_private_rooms",
+                keyvalues={"other_user_id": user_id},
+            )
+            txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
+
+        return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn)
+
+    @defer.inlineCallbacks
+    def get_users_in_dir_due_to_room(self, room_id):
+        """Get all user_ids that are in the room directory because they're
+        in the given room_id
+        """
+        user_ids_share_pub = yield self._simple_select_onecol(
+            table="users_in_public_rooms",
+            keyvalues={"room_id": room_id},
+            retcol="user_id",
+            desc="get_users_in_dir_due_to_room",
+        )
+
+        user_ids_share_priv = yield self._simple_select_onecol(
+            table="users_who_share_private_rooms",
+            keyvalues={"room_id": room_id},
+            retcol="other_user_id",
+            desc="get_users_in_dir_due_to_room",
+        )
+
+        user_ids = set(user_ids_share_pub)
+        user_ids.update(user_ids_share_priv)
+
+        return user_ids
+
     def remove_user_who_share_room(self, user_id, room_id):
         """
         Deletes entries in the users_who_share_*_rooms table. The first
@@ -637,31 +680,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore):
 
         return [room_id for room_id, in rows]
 
-    def delete_all_from_user_dir(self):
-        """Delete the entire user directory
-        """
-
-        def _delete_all_from_user_dir_txn(txn):
-            txn.execute("DELETE FROM user_directory")
-            txn.execute("DELETE FROM user_directory_search")
-            txn.execute("DELETE FROM users_in_public_rooms")
-            txn.execute("DELETE FROM users_who_share_private_rooms")
-            txn.call_after(self.get_user_in_directory.invalidate_all)
-
-        return self.runInteraction(
-            "delete_all_from_user_dir", _delete_all_from_user_dir_txn
-        )
-
-    @cached()
-    def get_user_in_directory(self, user_id):
-        return self._simple_select_one(
-            table="user_directory",
-            keyvalues={"user_id": user_id},
-            retcols=("display_name", "avatar_url"),
-            allow_none=True,
-            desc="get_user_in_directory",
-        )
-
     def get_user_directory_stream_pos(self):
         return self._simple_select_one_onecol(
             table="user_directory_stream_pos",
@@ -670,14 +688,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore):
             desc="get_user_directory_stream_pos",
         )
 
-    def update_user_directory_stream_pos(self, stream_id):
-        return self._simple_update_one(
-            table="user_directory_stream_pos",
-            keyvalues={},
-            updatevalues={"stream_id": stream_id},
-            desc="update_user_directory_stream_pos",
-        )
-
     @defer.inlineCallbacks
     def search_user_dir(self, user_id, search_term, limit):
         """Searches for users in directory

From 0496eafbf4277523430114cf965a254241b290e7 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 19:00:55 +0100
Subject: [PATCH 204/276] Move roommember's bg updates to a dedicated store

---
 synapse/storage/roommember.py | 222 +++++++++++++++++-----------------
 1 file changed, 114 insertions(+), 108 deletions(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 59a89fad6..4e606a838 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -27,6 +27,7 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage._base import LoggingTransaction
+from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.engines import Sqlite3Engine
 from synapse.storage.events_worker import EventsWorkerStore
 from synapse.types import get_domain_from_id
@@ -820,9 +821,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return set(room_ids)
 
 
-class RoomMemberStore(RoomMemberWorkerStore):
+class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore):
     def __init__(self, db_conn, hs):
-        super(RoomMemberStore, self).__init__(db_conn, hs)
+        super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs)
         self.register_background_update_handler(
             _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile
         )
@@ -838,112 +839,6 @@ class RoomMemberStore(RoomMemberWorkerStore):
             where_clause="forgotten = 1",
         )
 
-    def _store_room_members_txn(self, txn, events, backfilled):
-        """Store a room member in the database.
-        """
-        self._simple_insert_many_txn(
-            txn,
-            table="room_memberships",
-            values=[
-                {
-                    "event_id": event.event_id,
-                    "user_id": event.state_key,
-                    "sender": event.user_id,
-                    "room_id": event.room_id,
-                    "membership": event.membership,
-                    "display_name": event.content.get("displayname", None),
-                    "avatar_url": event.content.get("avatar_url", None),
-                }
-                for event in events
-            ],
-        )
-
-        for event in events:
-            txn.call_after(
-                self._membership_stream_cache.entity_has_changed,
-                event.state_key,
-                event.internal_metadata.stream_ordering,
-            )
-            txn.call_after(
-                self.get_invited_rooms_for_user.invalidate, (event.state_key,)
-            )
-
-            # We update the local_invites table only if the event is "current",
-            # i.e., its something that has just happened. If the event is an
-            # outlier it is only current if its an "out of band membership",
-            # like a remote invite or a rejection of a remote invite.
-            is_new_state = not backfilled and (
-                not event.internal_metadata.is_outlier()
-                or event.internal_metadata.is_out_of_band_membership()
-            )
-            is_mine = self.hs.is_mine_id(event.state_key)
-            if is_new_state and is_mine:
-                if event.membership == Membership.INVITE:
-                    self._simple_insert_txn(
-                        txn,
-                        table="local_invites",
-                        values={
-                            "event_id": event.event_id,
-                            "invitee": event.state_key,
-                            "inviter": event.sender,
-                            "room_id": event.room_id,
-                            "stream_id": event.internal_metadata.stream_ordering,
-                        },
-                    )
-                else:
-                    sql = (
-                        "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE"
-                        " room_id = ? AND invitee = ? AND locally_rejected is NULL"
-                        " AND replaced_by is NULL"
-                    )
-
-                    txn.execute(
-                        sql,
-                        (
-                            event.internal_metadata.stream_ordering,
-                            event.event_id,
-                            event.room_id,
-                            event.state_key,
-                        ),
-                    )
-
-    @defer.inlineCallbacks
-    def locally_reject_invite(self, user_id, room_id):
-        sql = (
-            "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE"
-            " room_id = ? AND invitee = ? AND locally_rejected is NULL"
-            " AND replaced_by is NULL"
-        )
-
-        def f(txn, stream_ordering):
-            txn.execute(sql, (stream_ordering, True, room_id, user_id))
-
-        with self._stream_id_gen.get_next() as stream_ordering:
-            yield self.runInteraction("locally_reject_invite", f, stream_ordering)
-
-    def forget(self, user_id, room_id):
-        """Indicate that user_id wishes to discard history for room_id."""
-
-        def f(txn):
-            sql = (
-                "UPDATE"
-                "  room_memberships"
-                " SET"
-                "  forgotten = 1"
-                " WHERE"
-                "  user_id = ?"
-                " AND"
-                "  room_id = ?"
-            )
-            txn.execute(sql, (user_id, room_id))
-
-            self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
-            self._invalidate_cache_and_stream(
-                txn, self.get_forgotten_rooms_for_user, (user_id,)
-            )
-
-        return self.runInteraction("forget_membership", f)
-
     @defer.inlineCallbacks
     def _background_add_membership_profile(self, progress, batch_size):
         target_min_stream_id = progress.get(
@@ -1078,6 +973,117 @@ class RoomMemberStore(RoomMemberWorkerStore):
         return row_count
 
 
+class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
+    def __init__(self, db_conn, hs):
+        super(RoomMemberStore, self).__init__(db_conn, hs)
+
+    def _store_room_members_txn(self, txn, events, backfilled):
+        """Store a room member in the database.
+        """
+        self._simple_insert_many_txn(
+            txn,
+            table="room_memberships",
+            values=[
+                {
+                    "event_id": event.event_id,
+                    "user_id": event.state_key,
+                    "sender": event.user_id,
+                    "room_id": event.room_id,
+                    "membership": event.membership,
+                    "display_name": event.content.get("displayname", None),
+                    "avatar_url": event.content.get("avatar_url", None),
+                }
+                for event in events
+            ],
+        )
+
+        for event in events:
+            txn.call_after(
+                self._membership_stream_cache.entity_has_changed,
+                event.state_key,
+                event.internal_metadata.stream_ordering,
+            )
+            txn.call_after(
+                self.get_invited_rooms_for_user.invalidate, (event.state_key,)
+            )
+
+            # We update the local_invites table only if the event is "current",
+            # i.e., its something that has just happened. If the event is an
+            # outlier it is only current if its an "out of band membership",
+            # like a remote invite or a rejection of a remote invite.
+            is_new_state = not backfilled and (
+                not event.internal_metadata.is_outlier()
+                or event.internal_metadata.is_out_of_band_membership()
+            )
+            is_mine = self.hs.is_mine_id(event.state_key)
+            if is_new_state and is_mine:
+                if event.membership == Membership.INVITE:
+                    self._simple_insert_txn(
+                        txn,
+                        table="local_invites",
+                        values={
+                            "event_id": event.event_id,
+                            "invitee": event.state_key,
+                            "inviter": event.sender,
+                            "room_id": event.room_id,
+                            "stream_id": event.internal_metadata.stream_ordering,
+                        },
+                    )
+                else:
+                    sql = (
+                        "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE"
+                        " room_id = ? AND invitee = ? AND locally_rejected is NULL"
+                        " AND replaced_by is NULL"
+                    )
+
+                    txn.execute(
+                        sql,
+                        (
+                            event.internal_metadata.stream_ordering,
+                            event.event_id,
+                            event.room_id,
+                            event.state_key,
+                        ),
+                    )
+
+    @defer.inlineCallbacks
+    def locally_reject_invite(self, user_id, room_id):
+        sql = (
+            "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE"
+            " room_id = ? AND invitee = ? AND locally_rejected is NULL"
+            " AND replaced_by is NULL"
+        )
+
+        def f(txn, stream_ordering):
+            txn.execute(sql, (stream_ordering, True, room_id, user_id))
+
+        with self._stream_id_gen.get_next() as stream_ordering:
+            yield self.runInteraction("locally_reject_invite", f, stream_ordering)
+
+    def forget(self, user_id, room_id):
+        """Indicate that user_id wishes to discard history for room_id."""
+
+        def f(txn):
+            sql = (
+                "UPDATE"
+                "  room_memberships"
+                " SET"
+                "  forgotten = 1"
+                " WHERE"
+                "  user_id = ?"
+                " AND"
+                "  room_id = ?"
+            )
+            txn.execute(sql, (user_id, room_id))
+
+            self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
+            self._invalidate_cache_and_stream(
+                txn, self.get_forgotten_rooms_for_user, (user_id,)
+            )
+
+        return self.runInteraction("forget_membership", f)
+
+
 class _JoinedHostsCache(object):
     """Cache for joined hosts in a room that is optimised to handle updates
     via state deltas.

From cc2e19ad4b4fb55306f060354f74d1750e4b6001 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 7 Oct 2019 17:37:55 +0100
Subject: [PATCH 205/276] fix changelog

---
 changelog.d/6175.misc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/6175.misc b/changelog.d/6175.misc
index 5bb24f02f..30b3e5608 100644
--- a/changelog.d/6175.misc
+++ b/changelog.d/6175.misc
@@ -1 +1 @@
-Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this
+Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this.
\ No newline at end of file

From 66ebea17235d9d3988d56cd1355656bbb508b3be Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Oct 2019 18:23:05 +0100
Subject: [PATCH 206/276] Lint

---
 synapse/storage/media_repository.py | 1 -
 synapse/storage/search.py           | 1 -
 synapse/storage/state.py            | 4 +++-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py
index 2eb2740d0..84b5f3ad5 100644
--- a/synapse/storage/media_repository.py
+++ b/synapse/storage/media_repository.py
@@ -16,7 +16,6 @@ from synapse.storage.background_updates import BackgroundUpdateStore
 
 
 class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore):
-
     def __init__(self, db_conn, hs):
         super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs)
 
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index 9a41e7800..6ba4190f1 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -337,7 +337,6 @@ class SearchBackgroundUpdateStore(BackgroundUpdateStore):
 
 
 class SearchStore(SearchBackgroundUpdateStore):
-
     def __init__(self, db_conn, hs):
         super(SearchStore, self).__init__(db_conn, hs)
 
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 71b533c00..a941a5ae3 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -1288,7 +1288,9 @@ class StateGroupWorkerStore(
         return self.runInteraction("store_state_group", _store_state_group_txn)
 
 
-class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore, BackgroundUpdateStore):
+class StateBackgroundUpdateStore(
+    StateGroupBackgroundUpdateStore, BackgroundUpdateStore
+):
 
     STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
     STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"

From 21b5d8b1076354c7c6ee8849491f3fc886cc8189 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 7 Oct 2019 18:00:31 +0100
Subject: [PATCH 207/276] Changelog

---
 changelog.d/6178.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6178.bugfix

diff --git a/changelog.d/6178.bugfix b/changelog.d/6178.bugfix
new file mode 100644
index 000000000..cd288c2a4
--- /dev/null
+++ b/changelog.d/6178.bugfix
@@ -0,0 +1 @@
+Make the `synapse_port_db` script create the right indexes on a new PostgreSQL database.

From b94a401852a5b6d87455285ea050c4e0731dd6ab Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 8 Oct 2019 09:35:37 +0100
Subject: [PATCH 208/276] Fix /federation/v1/state for recent room versions
 (#6170)

* Fix /federation/v1/state for recent room versions

Turns out this endpoint was completely broken for v3 rooms. Hopefully this
re-signing code is irrelevant nowadays anyway.
---
 changelog.d/6170.bugfix                 |  1 +
 synapse/federation/federation_server.py | 13 -------------
 2 files changed, 1 insertion(+), 13 deletions(-)
 create mode 100644 changelog.d/6170.bugfix

diff --git a/changelog.d/6170.bugfix b/changelog.d/6170.bugfix
new file mode 100644
index 000000000..52f7ea233
--- /dev/null
+++ b/changelog.d/6170.bugfix
@@ -0,0 +1 @@
+Fix /federation/v1/state endpoint for recent room versions.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index da06ab379..21e52c969 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -36,7 +36,6 @@ from synapse.api.errors import (
     UnsupportedRoomVersionError,
 )
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
-from synapse.crypto.event_signing import compute_event_signature
 from synapse.events import room_version_to_event_format
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
 from synapse.federation.persistence import TransactionActions
@@ -322,18 +321,6 @@ class FederationServer(FederationBase):
         pdus = yield self.handler.get_state_for_pdu(room_id, event_id)
         auth_chain = yield self.store.get_auth_chain([pdu.event_id for pdu in pdus])
 
-        for event in auth_chain:
-            # We sign these again because there was a bug where we
-            # incorrectly signed things the first time round
-            if self.hs.is_mine_id(event.event_id):
-                event.signatures.update(
-                    compute_event_signature(
-                        event.get_pdu_json(),
-                        self.hs.hostname,
-                        self.hs.config.signing_key[0],
-                    )
-                )
-
         return {
             "pdus": [pdu.get_pdu_json() for pdu in pdus],
             "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],

From ea7d938bca2d5fa0d6a54412ecdf036c5a3fc3a7 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 8 Oct 2019 13:51:25 +0100
Subject: [PATCH 209/276] Remove unused public room list timeout param (#6179)

* Remove unused public room list timeout param

* Add changelog
---
 changelog.d/6179.misc         |  1 +
 synapse/handlers/room_list.py | 13 +------------
 2 files changed, 2 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/6179.misc

diff --git a/changelog.d/6179.misc b/changelog.d/6179.misc
new file mode 100644
index 000000000..01c4e71ea
--- /dev/null
+++ b/changelog.d/6179.misc
@@ -0,0 +1 @@
+Remove unused `timeout` parameter from `_get_public_room_list`.
\ No newline at end of file
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index cfed344d4..c615206df 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -88,16 +88,8 @@ class RoomListHandler(BaseHandler):
             # appservice specific lists.
             logger.info("Bypassing cache as search request.")
 
-            # XXX: Quick hack to stop room directory queries taking too long.
-            # Timeout request after 60s. Probably want a more fundamental
-            # solution at some point
-            timeout = self.clock.time() + 60
             return self._get_public_room_list(
-                limit,
-                since_token,
-                search_filter,
-                network_tuple=network_tuple,
-                timeout=timeout,
+                limit, since_token, search_filter, network_tuple=network_tuple
             )
 
         key = (limit, since_token, network_tuple)
@@ -118,7 +110,6 @@ class RoomListHandler(BaseHandler):
         search_filter=None,
         network_tuple=EMPTY_THIRD_PARTY_ID,
         from_federation=False,
-        timeout=None,
     ):
         """Generate a public room list.
         Args:
@@ -131,8 +122,6 @@ class RoomListHandler(BaseHandler):
                 Setting to None returns all public rooms across all lists.
             from_federation (bool): Whether this request originated from a
                 federating server or a client. Used for room filtering.
-            timeout (int|None): Amount of seconds to wait for a response before
-                timing out. TODO
         """
 
         # Pagination tokens work by storing the room ID sent in the last batch,

From 474abf1eb6852ca488fbf86d3da0622a457efef1 Mon Sep 17 00:00:00 2001
From: Anshul Angaria <angaria.anshul38@gmail.com>
Date: Tue, 8 Oct 2019 18:25:16 +0530
Subject: [PATCH 210/276] add M_TOO_LARGE error code for uploading a too large
 file (#6151)

Fixes #6109
---
 changelog.d/6109.bugfix                  | 1 +
 synapse/rest/media/v1/upload_resource.py | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6109.bugfix

diff --git a/changelog.d/6109.bugfix b/changelog.d/6109.bugfix
new file mode 100644
index 000000000..da7ac1be4
--- /dev/null
+++ b/changelog.d/6109.bugfix
@@ -0,0 +1 @@
+Fix bug when uploading a large file: Synapse responds with `M_UNKNOWN` while it should be `M_TOO_LARGE` according to spec. Contributed by Anshul Angaria.
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 5d76bbdf6..83d005812 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -17,7 +17,7 @@ import logging
 
 from twisted.web.server import NOT_DONE_YET
 
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import (
     DirectServeResource,
     respond_with_json,
@@ -56,7 +56,11 @@ class UploadResource(DirectServeResource):
         if content_length is None:
             raise SynapseError(msg="Request must specify a Content-Length", code=400)
         if int(content_length) > self.max_upload_size:
-            raise SynapseError(msg="Upload request body is too large", code=413)
+            raise SynapseError(
+                msg="Upload request body is too large",
+                code=413,
+                errcode=Codes.TOO_LARGE,
+            )
 
         upload_name = parse_string(request, b"filename", encoding=None)
         if upload_name:

From 8f1b385accbf8be15c35b6f06b18eb6d998544e4 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 8 Oct 2019 14:36:33 +0100
Subject: [PATCH 211/276] Don't end up with 4 classes in registration

---
 synapse/storage/registration.py | 102 ++++++++++++++++----------------
 1 file changed, 50 insertions(+), 52 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 1f6c93b73..524b5eeab 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -37,57 +37,7 @@ THIRTY_MINUTES_IN_MS = 30 * 60 * 1000
 logger = logging.getLogger(__name__)
 
 
-class RegistrationDeactivationStore(SQLBaseStore):
-    @cachedInlineCallbacks()
-    def get_user_deactivated_status(self, user_id):
-        """Retrieve the value for the `deactivated` property for the provided user.
-
-        Args:
-            user_id (str): The ID of the user to retrieve the status for.
-
-        Returns:
-            defer.Deferred(bool): The requested value.
-        """
-
-        res = yield self._simple_select_one_onecol(
-            table="users",
-            keyvalues={"name": user_id},
-            retcol="deactivated",
-            desc="get_user_deactivated_status",
-        )
-
-        # Convert the integer into a boolean.
-        return res == 1
-
-    @defer.inlineCallbacks
-    def set_user_deactivated_status(self, user_id, deactivated):
-        """Set the `deactivated` property for the provided user to the provided value.
-
-        Args:
-            user_id (str): The ID of the user to set the status for.
-            deactivated (bool): The value to set for `deactivated`.
-        """
-
-        yield self.runInteraction(
-            "set_user_deactivated_status",
-            self.set_user_deactivated_status_txn,
-            user_id,
-            deactivated,
-        )
-
-    def set_user_deactivated_status_txn(self, txn, user_id, deactivated):
-        self._simple_update_one_txn(
-            txn=txn,
-            table="users",
-            keyvalues={"name": user_id},
-            updatevalues={"deactivated": 1 if deactivated else 0},
-        )
-        self._invalidate_cache_and_stream(
-            txn, self.get_user_deactivated_status, (user_id,)
-        )
-
-
-class RegistrationWorkerStore(RegistrationDeactivationStore):
+class RegistrationWorkerStore(SQLBaseStore):
     def __init__(self, db_conn, hs):
         super(RegistrationWorkerStore, self).__init__(db_conn, hs)
 
@@ -723,6 +673,27 @@ class RegistrationWorkerStore(RegistrationDeactivationStore):
             desc="get_id_servers_user_bound",
         )
 
+    @cachedInlineCallbacks()
+    def get_user_deactivated_status(self, user_id):
+        """Retrieve the value for the `deactivated` property for the provided user.
+
+        Args:
+            user_id (str): The ID of the user to retrieve the status for.
+
+        Returns:
+            defer.Deferred(bool): The requested value.
+        """
+
+        res = yield self._simple_select_one_onecol(
+            table="users",
+            keyvalues={"name": user_id},
+            retcol="deactivated",
+            desc="get_user_deactivated_status",
+        )
+
+        # Convert the integer into a boolean.
+        return res == 1
+
     def get_threepid_validation_session(
         self, medium, client_secret, address=None, sid=None, validated=True
     ):
@@ -817,7 +788,7 @@ class RegistrationWorkerStore(RegistrationDeactivationStore):
 
 
 class RegistrationBackgroundUpdateStore(
-    RegistrationDeactivationStore, background_updates.BackgroundUpdateStore
+    RegistrationWorkerStore, background_updates.BackgroundUpdateStore
 ):
     def __init__(self, db_conn, hs):
         super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs)
@@ -1499,3 +1470,30 @@ class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateSto
             cull_expired_threepid_validation_tokens_txn,
             self.clock.time_msec(),
         )
+
+    @defer.inlineCallbacks
+    def set_user_deactivated_status(self, user_id, deactivated):
+        """Set the `deactivated` property for the provided user to the provided value.
+
+        Args:
+            user_id (str): The ID of the user to set the status for.
+            deactivated (bool): The value to set for `deactivated`.
+        """
+
+        yield self.runInteraction(
+            "set_user_deactivated_status",
+            self.set_user_deactivated_status_txn,
+            user_id,
+            deactivated,
+        )
+
+    def set_user_deactivated_status_txn(self, txn, user_id, deactivated):
+        self._simple_update_one_txn(
+            txn=txn,
+            table="users",
+            keyvalues={"name": user_id},
+            updatevalues={"deactivated": 1 if deactivated else 0},
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_user_deactivated_status, (user_id,)
+        )

From b1c0a4ceb3c2c5ca51f0b32efcd58d8493fd9b99 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 8 Oct 2019 14:38:14 +0100
Subject: [PATCH 212/276] Cleanup client_ips

---
 synapse/storage/client_ips.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 1d89b50f5..067820a5d 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -35,15 +35,8 @@ LAST_SEEN_GRANULARITY = 120 * 1000
 
 class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore):
     def __init__(self, db_conn, hs):
-
-        self.client_ip_last_seen = Cache(
-            name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
-        )
-
         super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs)
 
-        self.user_ips_max_age = hs.config.user_ips_max_age
-
         self.register_background_index_update(
             "user_ips_device_index",
             index_name="user_ips_device_id",

From c69324ffb588f72786c37b864d510abd279e47a2 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 8 Oct 2019 14:48:33 +0100
Subject: [PATCH 213/276] Fix RegistrationStore

---
 synapse/storage/registration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 524b5eeab..6c5b29288 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -915,7 +915,7 @@ class RegistrationBackgroundUpdateStore(
         return 1
 
 
-class RegistrationStore(RegistrationWorkerStore, RegistrationBackgroundUpdateStore):
+class RegistrationStore(RegistrationBackgroundUpdateStore):
     def __init__(self, db_conn, hs):
         super(RegistrationStore, self).__init__(db_conn, hs)
 

From ced4784592ab6b9080ec1d6c7aa2664a42b3a38e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 8 Oct 2019 14:31:43 +0100
Subject: [PATCH 214/276] Fix inserting bytes as text

---
 synapse/storage/events.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 2e485c864..bb6ff0595 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -23,7 +23,7 @@ from functools import wraps
 from six import iteritems, text_type
 from six.moves import range
 
-from canonicaljson import encode_canonical_json, json
+from canonicaljson import json
 from prometheus_client import Counter, Histogram
 
 from twisted.internet import defer
@@ -1632,9 +1632,7 @@ class EventsStore(
                 and original_event.internal_metadata.is_redacted()
             ):
                 # Redaction was allowed
-                pruned_json = encode_canonical_json(
-                    prune_event_dict(original_event.get_dict())
-                )
+                pruned_json = encode_json(prune_event_dict(original_event.get_dict()))
             else:
                 # Redaction wasn't allowed
                 pruned_json = None

From 6b72508d15b2a074fb35e68cec45700f10ea09f2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 8 Oct 2019 15:03:28 +0100
Subject: [PATCH 215/276] Newsfile

---
 changelog.d/6185.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6185.bugfix

diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix
new file mode 100644
index 000000000..199ec6903
--- /dev/null
+++ b/changelog.d/6185.bugfix
@@ -0,0 +1 @@
+Fix bug where we were updating censored events as bytes rather than text, occaisonally causing invalid JSON being inserted breaking APIs that attempted to fetch such events.

From e7631d84e62e05b0dd0087b1b32b1f5f3ac521c5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 8 Oct 2019 15:09:05 +0100
Subject: [PATCH 216/276] Fix existing hex encoded json values in DB

---
 .../redaction_censor3_fix_update.sql.postgres | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres

diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
new file mode 100644
index 000000000..f7bcc5e2f
--- /dev/null
+++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
@@ -0,0 +1,26 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- There was a bug where we may have updated censored redactions as bytes,
+-- which can (somehow) cause json to be inserted hex encoded. This goes and
+-- undoes any such hex encoded JSON.
+UPDATE event_json SET json = convert_from(json::bytea, 'utf8')
+WHERE event_id IN (
+  SELECT event_json.event_id
+  FROM event_json
+  INNER JOIN redactions ON (event_json.event_id = redacts)
+  WHERE have_censored AND json NOT LIKE '{%'
+);

From 1d3858371e9577faf3382d1feee97154e5085cd4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 8 Oct 2019 16:21:17 +0100
Subject: [PATCH 217/276] Disable bytes usage with postgres

More often than not passing bytes to `txn.execute` is a bug (where we
meant to pass a string) that just happens to work if `BYTEA_OUTPUT` is
set to `ESCAPE`. However, this is a bit of a footgun so we want to
instead error when this happens, and force using `bytearray` if we
actually want to use bytes.
---
 synapse/storage/engines/postgres.py | 7 +++++++
 synapse/storage/filtering.py        | 4 ++--
 synapse/storage/pusher.py           | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 601617b21..d670286fa 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -22,6 +22,13 @@ class PostgresEngine(object):
     def __init__(self, database_module, database_config):
         self.module = database_module
         self.module.extensions.register_type(self.module.extensions.UNICODE)
+
+        # Disables passing `bytes` to txn.execute, c.f. #6186. If you do
+        # actually want to use bytes than wrap it in `bytearray`.
+        def _disable_bytes_adapter(_):
+            raise Exception("Passing bytes to DB is disabled.")
+
+        self.module.extensions.register_adapter(bytes, _disable_bytes_adapter)
         self.synchronous_commit = database_config.get("synchronous_commit", True)
         self._version = None  # unknown as yet
 
diff --git a/synapse/storage/filtering.py b/synapse/storage/filtering.py
index 23b48f6ce..7c2a7da83 100644
--- a/synapse/storage/filtering.py
+++ b/synapse/storage/filtering.py
@@ -51,7 +51,7 @@ class FilteringStore(SQLBaseStore):
                 "SELECT filter_id FROM user_filters "
                 "WHERE user_id = ? AND filter_json = ?"
             )
-            txn.execute(sql, (user_localpart, def_json))
+            txn.execute(sql, (user_localpart, bytearray(def_json)))
             filter_id_response = txn.fetchone()
             if filter_id_response is not None:
                 return filter_id_response[0]
@@ -68,7 +68,7 @@ class FilteringStore(SQLBaseStore):
                 "INSERT INTO user_filters (user_id, filter_id, filter_json)"
                 "VALUES(?, ?, ?)"
             )
-            txn.execute(sql, (user_localpart, filter_id, def_json))
+            txn.execute(sql, (user_localpart, filter_id, bytearray(def_json)))
 
             return filter_id
 
diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py
index 3e0e834a6..b12e80440 100644
--- a/synapse/storage/pusher.py
+++ b/synapse/storage/pusher.py
@@ -241,7 +241,7 @@ class PusherStore(PusherWorkerStore):
                     "device_display_name": device_display_name,
                     "ts": pushkey_ts,
                     "lang": lang,
-                    "data": encode_canonical_json(data),
+                    "data": bytearray(encode_canonical_json(data)),
                     "last_stream_ordering": last_stream_ordering,
                     "profile_tag": profile_tag,
                     "id": stream_id,

From de26678724cd5c19dcc77c0d55fd89320cee38d4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 9 Oct 2019 15:13:02 +0100
Subject: [PATCH 218/276] Update changelog.d/6185.bugfix

Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 changelog.d/6185.bugfix | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix
index 199ec6903..9d1c669b8 100644
--- a/changelog.d/6185.bugfix
+++ b/changelog.d/6185.bugfix
@@ -1 +1 @@
-Fix bug where we were updating censored events as bytes rather than text, occaisonally causing invalid JSON being inserted breaking APIs that attempted to fetch such events.
+Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events.

From 7f18b3d5262746d4095c747f6b80899445f0aa2d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 9 Oct 2019 16:03:24 +0100
Subject: [PATCH 219/276] Do the update as a background index

---
 synapse/storage/events_bg_updates.py          | 43 +++++++++++++++++++
 .../redaction_censor3_fix_update.sql.postgres | 17 ++++----
 2 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py
index 5717baf48..e77a7e28a 100644
--- a/synapse/storage/events_bg_updates.py
+++ b/synapse/storage/events_bg_updates.py
@@ -71,6 +71,19 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             "redactions_received_ts", self._redactions_received_ts
         )
 
+        # This index gets deleted in `event_fix_redactions_bytes` update
+        self.register_background_index_update(
+            "event_fix_redactions_bytes_create_index",
+            index_name="redactions_censored_redacts",
+            table="redactions",
+            columns=["redacts"],
+            where_clause="have_censored",
+        )
+
+        self.register_background_update_handler(
+            "event_fix_redactions_bytes", self._event_fix_redactions_bytes
+        )
+
     @defer.inlineCallbacks
     def _background_reindex_fields_sender(self, progress, batch_size):
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
@@ -458,3 +471,33 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             yield self._end_background_update("redactions_received_ts")
 
         return count
+
+    @defer.inlineCallbacks
+    def _event_fix_redactions_bytes(self, progress, batch_size):
+        """Undoes hex encoded censored redacted event JSON.
+        """
+
+        def _event_fix_redactions_bytes_txn(txn):
+            # This update is quite fast due to new index.
+            txn.execute(
+                """
+                UPDATE event_json
+                SET
+                    json = convert_from(json::bytea, 'utf8')
+                FROM redactions
+                WHERE
+                    redactions.have_censored
+                    AND event_json.event_id = redactions.redacts
+                    AND json NOT LIKE '{%';
+                """
+            )
+
+            txn.execute("DROP INDEX redactions_censored_redacts")
+
+        yield self.runInteraction(
+            "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn
+        )
+
+        yield self._end_background_update("event_fix_redactions_bytes")
+
+        return 1
diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
index f7bcc5e2f..67471f3ef 100644
--- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
+++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
@@ -15,12 +15,11 @@
 
 
 -- There was a bug where we may have updated censored redactions as bytes,
--- which can (somehow) cause json to be inserted hex encoded. This goes and
--- undoes any such hex encoded JSON.
-UPDATE event_json SET json = convert_from(json::bytea, 'utf8')
-WHERE event_id IN (
-  SELECT event_json.event_id
-  FROM event_json
-  INNER JOIN redactions ON (event_json.event_id = redacts)
-  WHERE have_censored AND json NOT LIKE '{%'
-);
+-- which can (somehow) cause json to be inserted hex encoded. These updates go
+-- and undoes any such hex encoded JSON.
+
+INSERT into background_updates (update_name, progress_json)
+  VALUES ('event_fix_redactions_bytes_create_index', '{}');
+
+INSERT into background_updates (update_name, progress_json, depends_on)
+  VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index');

From c3b34dc32f85ed0b526dde1ed3d61316a8f461d8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 9 Oct 2019 16:32:04 +0100
Subject: [PATCH 220/276] Newsfile

---
 changelog.d/6186.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6186.bugfix

diff --git a/changelog.d/6186.bugfix b/changelog.d/6186.bugfix
new file mode 100644
index 000000000..199ec6903
--- /dev/null
+++ b/changelog.d/6186.bugfix
@@ -0,0 +1 @@
+Fix bug where we were updating censored events as bytes rather than text, occaisonally causing invalid JSON being inserted breaking APIs that attempted to fetch such events.

From 4535a07f4af0854eed0cfd171e4032bdd3f39cbb Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Wed, 9 Oct 2019 17:54:03 -0400
Subject: [PATCH 221/276] make version optional in body of e2e backup version
 update

to agree with latest version of the MSC
---
 synapse/handlers/e2e_room_keys.py         |  4 +-
 synapse/rest/client/v2_alpha/room_keys.py |  2 +-
 tests/handlers/test_e2e_room_keys.py      | 47 +++++++++++++++--------
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py
index a9d80f708..0cea445f0 100644
--- a/synapse/handlers/e2e_room_keys.py
+++ b/synapse/handlers/e2e_room_keys.py
@@ -352,8 +352,8 @@ class E2eRoomKeysHandler(object):
             A deferred of an empty dict.
         """
         if "version" not in version_info:
-            raise SynapseError(400, "Missing version in body", Codes.MISSING_PARAM)
-        if version_info["version"] != version:
+            version_info["version"] = version
+        elif version_info["version"] != version:
             raise SynapseError(
                 400, "Version in body does not match", Codes.INVALID_PARAM
             )
diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py
index df4f44cd3..d59678643 100644
--- a/synapse/rest/client/v2_alpha/room_keys.py
+++ b/synapse/rest/client/v2_alpha/room_keys.py
@@ -375,7 +375,7 @@ class RoomKeysVersionServlet(RestServlet):
                     "ed25519:something": "hijklmnop"
                 }
             },
-            "version": "42"
+            "version": "12345"
         }
 
         HTTP/1.1 200 OK
diff --git a/tests/handlers/test_e2e_room_keys.py b/tests/handlers/test_e2e_room_keys.py
index c4503c161..c700a2fad 100644
--- a/tests/handlers/test_e2e_room_keys.py
+++ b/tests/handlers/test_e2e_room_keys.py
@@ -187,9 +187,8 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase):
         self.assertEqual(res, 404)
 
     @defer.inlineCallbacks
-    def test_update_bad_version(self):
-        """Check that we get a 400 if the version in the body is missing or
-        doesn't match
+    def test_update_missing_version(self):
+        """Check that the update succeeds if the version is missing from the body
         """
         version = yield self.handler.create_version(
             self.local_user,
@@ -197,19 +196,35 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase):
         )
         self.assertEqual(version, "1")
 
-        res = None
-        try:
-            yield self.handler.update_version(
-                self.local_user,
-                version,
-                {
-                    "algorithm": "m.megolm_backup.v1",
-                    "auth_data": "revised_first_version_auth_data",
-                },
-            )
-        except errors.SynapseError as e:
-            res = e.code
-        self.assertEqual(res, 400)
+        yield self.handler.update_version(
+            self.local_user,
+            version,
+            {
+                "algorithm": "m.megolm_backup.v1",
+                "auth_data": "revised_first_version_auth_data",
+            },
+        )
+
+        # check we can retrieve it as the current version
+        res = yield self.handler.get_version_info(self.local_user)
+        self.assertDictEqual(
+            res,
+            {
+                "algorithm": "m.megolm_backup.v1",
+                "auth_data": "revised_first_version_auth_data",
+                "version": version,
+            },
+        )
+
+    @defer.inlineCallbacks
+    def test_update_bad_version(self):
+        """Check that we get a 400 if the version in the body doesn't match
+        """
+        version = yield self.handler.create_version(
+            self.local_user,
+            {"algorithm": "m.megolm_backup.v1", "auth_data": "first_version_auth_data"},
+        )
+        self.assertEqual(version, "1")
 
         res = None
         try:

From b46cc856ec9f8ac8c96199a5291dfa71cd37ee86 Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Wed, 9 Oct 2019 18:03:40 -0400
Subject: [PATCH 222/276] add changelog

---
 changelog.d/6189.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6189.misc

diff --git a/changelog.d/6189.misc b/changelog.d/6189.misc
new file mode 100644
index 000000000..a66eb384e
--- /dev/null
+++ b/changelog.d/6189.misc
@@ -0,0 +1 @@
+Make `version` optional in body of `PUT /room_keys/version/{version}`, since it's redundant.

From f743108a94658eb1dbaf168d39874272f756a386 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Thu, 10 Oct 2019 09:39:35 +0100
Subject: [PATCH 223/276] Refactor HomeserverConfig so it can be typechecked
 (#6137)

---
 changelog.d/6137.misc                     |   1 +
 mypy.ini                                  |  16 +-
 synapse/config/_base.py                   | 191 +++++++++++++++++-----
 synapse/config/_base.pyi                  | 135 +++++++++++++++
 synapse/config/api.py                     |   2 +
 synapse/config/appservice.py              |   2 +
 synapse/config/captcha.py                 |   2 +
 synapse/config/cas.py                     |   2 +
 synapse/config/consent_config.py          |   3 +
 synapse/config/database.py                |   2 +
 synapse/config/emailconfig.py             |   2 +
 synapse/config/groups.py                  |   2 +
 synapse/config/homeserver.py              |  68 ++++----
 synapse/config/jwt_config.py              |   2 +
 synapse/config/key.py                     |   2 +
 synapse/config/logger.py                  |   2 +
 synapse/config/metrics.py                 |   2 +
 synapse/config/password.py                |   2 +
 synapse/config/password_auth_providers.py |   2 +
 synapse/config/push.py                    |   2 +
 synapse/config/ratelimiting.py            |   2 +
 synapse/config/registration.py            |   4 +
 synapse/config/repository.py              |   2 +
 synapse/config/room_directory.py          |   2 +
 synapse/config/saml2_config.py            |   2 +
 synapse/config/server.py                  |   2 +
 synapse/config/server_notices_config.py   |   2 +
 synapse/config/spam_checker.py            |   2 +
 synapse/config/stats.py                   |   2 +
 synapse/config/third_party_event_rules.py |   2 +
 synapse/config/tls.py                     |   9 +-
 synapse/config/tracer.py                  |   2 +
 synapse/config/user_directory.py          |   2 +
 synapse/config/voip.py                    |   2 +
 synapse/config/workers.py                 |   2 +
 tests/config/test_tls.py                  |  25 ++-
 tox.ini                                   |   3 +-
 37 files changed, 415 insertions(+), 94 deletions(-)
 create mode 100644 changelog.d/6137.misc
 create mode 100644 synapse/config/_base.pyi

diff --git a/changelog.d/6137.misc b/changelog.d/6137.misc
new file mode 100644
index 000000000..92a02e71c
--- /dev/null
+++ b/changelog.d/6137.misc
@@ -0,0 +1 @@
+Refactor configuration loading to allow better typechecking.
diff --git a/mypy.ini b/mypy.ini
index 8788574ee..ffadaddc0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -4,10 +4,6 @@ plugins=mypy_zope:plugin
 follow_imports=skip
 mypy_path=stubs
 
-[mypy-synapse.config.homeserver]
-# this is a mess because of the metaclass shenanigans
-ignore_errors = True
-
 [mypy-zope]
 ignore_missing_imports = True
 
@@ -52,3 +48,15 @@ ignore_missing_imports = True
 
 [mypy-signedjson.*]
 ignore_missing_imports = True
+
+[mypy-prometheus_client.*]
+ignore_missing_imports = True
+
+[mypy-service_identity.*]
+ignore_missing_imports = True
+
+[mypy-daemonize]
+ignore_missing_imports = True
+
+[mypy-sentry_sdk]
+ignore_missing_imports = True
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 31f653097..08619404b 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -18,7 +18,9 @@
 import argparse
 import errno
 import os
+from collections import OrderedDict
 from textwrap import dedent
+from typing import Any, MutableMapping, Optional
 
 from six import integer_types
 
@@ -51,7 +53,56 @@ Missing mandatory `server_name` config option.
 """
 
 
+def path_exists(file_path):
+    """Check if a file exists
+
+    Unlike os.path.exists, this throws an exception if there is an error
+    checking if the file exists (for example, if there is a perms error on
+    the parent dir).
+
+    Returns:
+        bool: True if the file exists; False if not.
+    """
+    try:
+        os.stat(file_path)
+        return True
+    except OSError as e:
+        if e.errno != errno.ENOENT:
+            raise e
+        return False
+
+
 class Config(object):
+    """
+    A configuration section, containing configuration keys and values.
+
+    Attributes:
+        section (str): The section title of this config object, such as
+            "tls" or "logger". This is used to refer to it on the root
+            logger (for example, `config.tls.some_option`). Must be
+            defined in subclasses.
+    """
+
+    section = None
+
+    def __init__(self, root_config=None):
+        self.root = root_config
+
+    def __getattr__(self, item: str) -> Any:
+        """
+        Try and fetch a configuration option that does not exist on this class.
+
+        This is so that existing configs that rely on `self.value`, where value
+        is actually from a different config section, continue to work.
+        """
+        if item in ["generate_config_section", "read_config"]:
+            raise AttributeError(item)
+
+        if self.root is None:
+            raise AttributeError(item)
+        else:
+            return self.root._get_unclassed_config(self.section, item)
+
     @staticmethod
     def parse_size(value):
         if isinstance(value, integer_types):
@@ -88,22 +139,7 @@ class Config(object):
 
     @classmethod
     def path_exists(cls, file_path):
-        """Check if a file exists
-
-        Unlike os.path.exists, this throws an exception if there is an error
-        checking if the file exists (for example, if there is a perms error on
-        the parent dir).
-
-        Returns:
-            bool: True if the file exists; False if not.
-        """
-        try:
-            os.stat(file_path)
-            return True
-        except OSError as e:
-            if e.errno != errno.ENOENT:
-                raise e
-            return False
+        return path_exists(file_path)
 
     @classmethod
     def check_file(cls, file_path, config_name):
@@ -136,42 +172,106 @@ class Config(object):
         with open(file_path) as file_stream:
             return file_stream.read()
 
-    def invoke_all(self, name, *args, **kargs):
-        """Invoke all instance methods with the given name and arguments in the
-        class's MRO.
+
+class RootConfig(object):
+    """
+    Holder of an application's configuration.
+
+    What configuration this object holds is defined by `config_classes`, a list
+    of Config classes that will be instantiated and given the contents of a
+    configuration file to read. They can then be accessed on this class by their
+    section name, defined in the Config or dynamically set to be the name of the
+    class, lower-cased and with "Config" removed.
+    """
+
+    config_classes = []
+
+    def __init__(self):
+        self._configs = OrderedDict()
+
+        for config_class in self.config_classes:
+            if config_class.section is None:
+                raise ValueError("%r requires a section name" % (config_class,))
+
+            try:
+                conf = config_class(self)
+            except Exception as e:
+                raise Exception("Failed making %s: %r" % (config_class.section, e))
+            self._configs[config_class.section] = conf
+
+    def __getattr__(self, item: str) -> Any:
+        """
+        Redirect lookups on this object either to config objects, or values on
+        config objects, so that `config.tls.blah` works, as well as legacy uses
+        of things like `config.server_name`. It will first look up the config
+        section name, and then values on those config classes.
+        """
+        if item in self._configs.keys():
+            return self._configs[item]
+
+        return self._get_unclassed_config(None, item)
+
+    def _get_unclassed_config(self, asking_section: Optional[str], item: str):
+        """
+        Fetch a config value from one of the instantiated config classes that
+        has not been fetched directly.
 
         Args:
-            name (str): Name of function to invoke
+            asking_section: If this check is coming from a Config child, which
+                one? This section will not be asked if it has the value.
+            item: The configuration value key.
+
+        Raises:
+            AttributeError if no config classes have the config key. The body
+                will contain what sections were checked.
+        """
+        for key, val in self._configs.items():
+            if key == asking_section:
+                continue
+
+            if item in dir(val):
+                return getattr(val, item)
+
+        raise AttributeError(item, "not found in %s" % (list(self._configs.keys()),))
+
+    def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]:
+        """
+        Invoke a function on all instantiated config objects this RootConfig is
+        configured to use.
+
+        Args:
+            func_name: Name of function to invoke
             *args
             **kwargs
-
         Returns:
-            list: The list of the return values from each method called
+            ordered dictionary of config section name and the result of the
+            function from it.
         """
-        results = []
-        for cls in type(self).mro():
-            if name in cls.__dict__:
-                results.append(getattr(cls, name)(self, *args, **kargs))
-        return results
+        res = OrderedDict()
+
+        for name, config in self._configs.items():
+            if hasattr(config, func_name):
+                res[name] = getattr(config, func_name)(*args, **kwargs)
+
+        return res
 
     @classmethod
-    def invoke_all_static(cls, name, *args, **kargs):
-        """Invoke all static methods with the given name and arguments in the
-        class's MRO.
+    def invoke_all_static(cls, func_name: str, *args, **kwargs):
+        """
+        Invoke a static function on config objects this RootConfig is
+        configured to use.
 
         Args:
-            name (str): Name of function to invoke
+            func_name: Name of function to invoke
             *args
             **kwargs
-
         Returns:
-            list: The list of the return values from each method called
+            ordered dictionary of config section name and the result of the
+            function from it.
         """
-        results = []
-        for c in cls.mro():
-            if name in c.__dict__:
-                results.append(getattr(c, name)(*args, **kargs))
-        return results
+        for config in cls.config_classes:
+            if hasattr(config, func_name):
+                getattr(config, func_name)(*args, **kwargs)
 
     def generate_config(
         self,
@@ -187,7 +287,8 @@ class Config(object):
         tls_private_key_path=None,
         acme_domain=None,
     ):
-        """Build a default configuration file
+        """
+        Build a default configuration file
 
         This is used when the user explicitly asks us to generate a config file
         (eg with --generate_config).
@@ -242,6 +343,7 @@ class Config(object):
         Returns:
             str: the yaml config file
         """
+
         return "\n\n".join(
             dedent(conf)
             for conf in self.invoke_all(
@@ -257,7 +359,7 @@ class Config(object):
                 tls_certificate_path=tls_certificate_path,
                 tls_private_key_path=tls_private_key_path,
                 acme_domain=acme_domain,
-            )
+            ).values()
         )
 
     @classmethod
@@ -444,7 +546,7 @@ class Config(object):
                 )
 
             (config_path,) = config_files
-            if not cls.path_exists(config_path):
+            if not path_exists(config_path):
                 print("Generating config file %s" % (config_path,))
 
                 if config_args.data_directory:
@@ -469,7 +571,7 @@ class Config(object):
                     open_private_ports=config_args.open_private_ports,
                 )
 
-                if not cls.path_exists(config_dir_path):
+                if not path_exists(config_dir_path):
                     os.makedirs(config_dir_path)
                 with open(config_path, "w") as config_file:
                     config_file.write("# vim:ft=yaml\n\n")
@@ -518,7 +620,7 @@ class Config(object):
 
         return obj
 
-    def parse_config_dict(self, config_dict, config_dir_path, data_dir_path):
+    def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None):
         """Read the information from the config dict into this Config object.
 
         Args:
@@ -607,3 +709,6 @@ def find_config_files(search_paths):
             else:
                 config_files.append(config_path)
     return config_files
+
+
+__all__ = ["Config", "RootConfig"]
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
new file mode 100644
index 000000000..86bc965ee
--- /dev/null
+++ b/synapse/config/_base.pyi
@@ -0,0 +1,135 @@
+from typing import Any, List, Optional
+
+from synapse.config import (
+    api,
+    appservice,
+    captcha,
+    cas,
+    consent_config,
+    database,
+    emailconfig,
+    groups,
+    jwt_config,
+    key,
+    logger,
+    metrics,
+    password,
+    password_auth_providers,
+    push,
+    ratelimiting,
+    registration,
+    repository,
+    room_directory,
+    saml2_config,
+    server,
+    server_notices_config,
+    spam_checker,
+    stats,
+    third_party_event_rules,
+    tls,
+    tracer,
+    user_directory,
+    voip,
+    workers,
+)
+
+class ConfigError(Exception): ...
+
+MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str
+MISSING_REPORT_STATS_SPIEL: str
+MISSING_SERVER_NAME: str
+
+def path_exists(file_path: str): ...
+
+class RootConfig:
+    server: server.ServerConfig
+    tls: tls.TlsConfig
+    database: database.DatabaseConfig
+    logging: logger.LoggingConfig
+    ratelimit: ratelimiting.RatelimitConfig
+    media: repository.ContentRepositoryConfig
+    captcha: captcha.CaptchaConfig
+    voip: voip.VoipConfig
+    registration: registration.RegistrationConfig
+    metrics: metrics.MetricsConfig
+    api: api.ApiConfig
+    appservice: appservice.AppServiceConfig
+    key: key.KeyConfig
+    saml2: saml2_config.SAML2Config
+    cas: cas.CasConfig
+    jwt: jwt_config.JWTConfig
+    password: password.PasswordConfig
+    email: emailconfig.EmailConfig
+    worker: workers.WorkerConfig
+    authproviders: password_auth_providers.PasswordAuthProviderConfig
+    push: push.PushConfig
+    spamchecker: spam_checker.SpamCheckerConfig
+    groups: groups.GroupsConfig
+    userdirectory: user_directory.UserDirectoryConfig
+    consent: consent_config.ConsentConfig
+    stats: stats.StatsConfig
+    servernotices: server_notices_config.ServerNoticesConfig
+    roomdirectory: room_directory.RoomDirectoryConfig
+    thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig
+    tracer: tracer.TracerConfig
+
+    config_classes: List = ...
+    def __init__(self) -> None: ...
+    def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ...
+    @classmethod
+    def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ...
+    def __getattr__(self, item: str): ...
+    def parse_config_dict(
+        self,
+        config_dict: Any,
+        config_dir_path: Optional[Any] = ...,
+        data_dir_path: Optional[Any] = ...,
+    ) -> None: ...
+    read_config: Any = ...
+    def generate_config(
+        self,
+        config_dir_path: str,
+        data_dir_path: str,
+        server_name: str,
+        generate_secrets: bool = ...,
+        report_stats: Optional[str] = ...,
+        open_private_ports: bool = ...,
+        listeners: Optional[Any] = ...,
+        database_conf: Optional[Any] = ...,
+        tls_certificate_path: Optional[str] = ...,
+        tls_private_key_path: Optional[str] = ...,
+        acme_domain: Optional[str] = ...,
+    ): ...
+    @classmethod
+    def load_or_generate_config(cls, description: Any, argv: Any): ...
+    @classmethod
+    def load_config(cls, description: Any, argv: Any): ...
+    @classmethod
+    def add_arguments_to_parser(cls, config_parser: Any) -> None: ...
+    @classmethod
+    def load_config_with_parser(cls, parser: Any, argv: Any): ...
+    def generate_missing_files(
+        self, config_dict: dict, config_dir_path: str
+    ) -> None: ...
+
+class Config:
+    root: RootConfig
+    def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ...
+    def __getattr__(self, item: str, from_root: bool = ...): ...
+    @staticmethod
+    def parse_size(value: Any): ...
+    @staticmethod
+    def parse_duration(value: Any): ...
+    @staticmethod
+    def abspath(file_path: Optional[str]): ...
+    @classmethod
+    def path_exists(cls, file_path: str): ...
+    @classmethod
+    def check_file(cls, file_path: str, config_name: str): ...
+    @classmethod
+    def ensure_directory(cls, dir_path: str): ...
+    @classmethod
+    def read_file(cls, file_path: str, config_name: str): ...
+
+def read_config_files(config_files: List[str]): ...
+def find_config_files(search_paths: List[str]): ...
diff --git a/synapse/config/api.py b/synapse/config/api.py
index dddea79a8..74cd53a8e 100644
--- a/synapse/config/api.py
+++ b/synapse/config/api.py
@@ -18,6 +18,8 @@ from ._base import Config
 
 
 class ApiConfig(Config):
+    section = "api"
+
     def read_config(self, config, **kwargs):
         self.room_invite_state_types = config.get(
             "room_invite_state_types",
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 28d36b1bc..9b4682222 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -30,6 +30,8 @@ logger = logging.getLogger(__name__)
 
 
 class AppServiceConfig(Config):
+    section = "appservice"
+
     def read_config(self, config, **kwargs):
         self.app_service_config_files = config.get("app_service_config_files", [])
         self.notify_appservices = config.get("notify_appservices", True)
diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py
index 8dac8152c..44bd5c679 100644
--- a/synapse/config/captcha.py
+++ b/synapse/config/captcha.py
@@ -16,6 +16,8 @@ from ._base import Config
 
 
 class CaptchaConfig(Config):
+    section = "captcha"
+
     def read_config(self, config, **kwargs):
         self.recaptcha_private_key = config.get("recaptcha_private_key")
         self.recaptcha_public_key = config.get("recaptcha_public_key")
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index ebe34d933..b916c3aa6 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -22,6 +22,8 @@ class CasConfig(Config):
     cas_server_url: URL of CAS server
     """
 
+    section = "cas"
+
     def read_config(self, config, **kwargs):
         cas_config = config.get("cas_config", None)
         if cas_config:
diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py
index 48976e17b..62c4c44d6 100644
--- a/synapse/config/consent_config.py
+++ b/synapse/config/consent_config.py
@@ -73,6 +73,9 @@ DEFAULT_CONFIG = """\
 
 
 class ConsentConfig(Config):
+
+    section = "consent"
+
     def __init__(self, *args):
         super(ConsentConfig, self).__init__(*args)
 
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 118aafbd4..0e2509f0b 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -21,6 +21,8 @@ from ._base import Config
 
 
 class DatabaseConfig(Config):
+    section = "database"
+
     def read_config(self, config, **kwargs):
         self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K"))
 
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index d9b43de66..658897a77 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -28,6 +28,8 @@ from ._base import Config, ConfigError
 
 
 class EmailConfig(Config):
+    section = "email"
+
     def read_config(self, config, **kwargs):
         # TODO: We should separate better the email configuration from the notification
         # and account validity config.
diff --git a/synapse/config/groups.py b/synapse/config/groups.py
index 2a522b5f4..d6862d9a6 100644
--- a/synapse/config/groups.py
+++ b/synapse/config/groups.py
@@ -17,6 +17,8 @@ from ._base import Config
 
 
 class GroupsConfig(Config):
+    section = "groups"
+
     def read_config(self, config, **kwargs):
         self.enable_group_creation = config.get("enable_group_creation", False)
         self.group_creation_prefix = config.get("group_creation_prefix", "")
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 72acad4f1..6e348671c 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ._base import RootConfig
 from .api import ApiConfig
 from .appservice import AppServiceConfig
 from .captcha import CaptchaConfig
@@ -46,36 +47,37 @@ from .voip import VoipConfig
 from .workers import WorkerConfig
 
 
-class HomeServerConfig(
-    ServerConfig,
-    TlsConfig,
-    DatabaseConfig,
-    LoggingConfig,
-    RatelimitConfig,
-    ContentRepositoryConfig,
-    CaptchaConfig,
-    VoipConfig,
-    RegistrationConfig,
-    MetricsConfig,
-    ApiConfig,
-    AppServiceConfig,
-    KeyConfig,
-    SAML2Config,
-    CasConfig,
-    JWTConfig,
-    PasswordConfig,
-    EmailConfig,
-    WorkerConfig,
-    PasswordAuthProviderConfig,
-    PushConfig,
-    SpamCheckerConfig,
-    GroupsConfig,
-    UserDirectoryConfig,
-    ConsentConfig,
-    StatsConfig,
-    ServerNoticesConfig,
-    RoomDirectoryConfig,
-    ThirdPartyRulesConfig,
-    TracerConfig,
-):
-    pass
+class HomeServerConfig(RootConfig):
+
+    config_classes = [
+        ServerConfig,
+        TlsConfig,
+        DatabaseConfig,
+        LoggingConfig,
+        RatelimitConfig,
+        ContentRepositoryConfig,
+        CaptchaConfig,
+        VoipConfig,
+        RegistrationConfig,
+        MetricsConfig,
+        ApiConfig,
+        AppServiceConfig,
+        KeyConfig,
+        SAML2Config,
+        CasConfig,
+        JWTConfig,
+        PasswordConfig,
+        EmailConfig,
+        WorkerConfig,
+        PasswordAuthProviderConfig,
+        PushConfig,
+        SpamCheckerConfig,
+        GroupsConfig,
+        UserDirectoryConfig,
+        ConsentConfig,
+        StatsConfig,
+        ServerNoticesConfig,
+        RoomDirectoryConfig,
+        ThirdPartyRulesConfig,
+        TracerConfig,
+    ]
diff --git a/synapse/config/jwt_config.py b/synapse/config/jwt_config.py
index 36d87cef0..a56872698 100644
--- a/synapse/config/jwt_config.py
+++ b/synapse/config/jwt_config.py
@@ -23,6 +23,8 @@ MISSING_JWT = """Missing jwt library. This is required for jwt login.
 
 
 class JWTConfig(Config):
+    section = "jwt"
+
     def read_config(self, config, **kwargs):
         jwt_config = config.get("jwt_config", None)
         if jwt_config:
diff --git a/synapse/config/key.py b/synapse/config/key.py
index f039f96e9..ec5d430af 100644
--- a/synapse/config/key.py
+++ b/synapse/config/key.py
@@ -92,6 +92,8 @@ class TrustedKeyServer(object):
 
 
 class KeyConfig(Config):
+    section = "key"
+
     def read_config(self, config, config_dir_path, **kwargs):
         # the signing key can be specified inline or in a separate file
         if "signing_key" in config:
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 767ecfdf0..d609ec111 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -84,6 +84,8 @@ root:
 
 
 class LoggingConfig(Config):
+    section = "logging"
+
     def read_config(self, config, **kwargs):
         self.log_config = self.abspath(config.get("log_config"))
         self.no_redirect_stdio = config.get("no_redirect_stdio", False)
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index ec35a6b86..282a43bdd 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -34,6 +34,8 @@ class MetricsFlags(object):
 
 
 class MetricsConfig(Config):
+    section = "metrics"
+
     def read_config(self, config, **kwargs):
         self.enable_metrics = config.get("enable_metrics", False)
         self.report_stats = config.get("report_stats", None)
diff --git a/synapse/config/password.py b/synapse/config/password.py
index d5b5953f2..2a634ac75 100644
--- a/synapse/config/password.py
+++ b/synapse/config/password.py
@@ -20,6 +20,8 @@ class PasswordConfig(Config):
     """Password login configuration
     """
 
+    section = "password"
+
     def read_config(self, config, **kwargs):
         password_config = config.get("password_config", {})
         if password_config is None:
diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py
index c50e24439..9746bbc68 100644
--- a/synapse/config/password_auth_providers.py
+++ b/synapse/config/password_auth_providers.py
@@ -23,6 +23,8 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider"
 
 
 class PasswordAuthProviderConfig(Config):
+    section = "authproviders"
+
     def read_config(self, config, **kwargs):
         self.password_providers = []  # type: List[Any]
         providers = []
diff --git a/synapse/config/push.py b/synapse/config/push.py
index 1b932722a..091095864 100644
--- a/synapse/config/push.py
+++ b/synapse/config/push.py
@@ -18,6 +18,8 @@ from ._base import Config
 
 
 class PushConfig(Config):
+    section = "push"
+
     def read_config(self, config, **kwargs):
         push_config = config.get("push", {})
         self.push_include_content = push_config.get("include_content", True)
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 587e2862b..947f653e0 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -36,6 +36,8 @@ class FederationRateLimitConfig(object):
 
 
 class RatelimitConfig(Config):
+    section = "ratelimiting"
+
     def read_config(self, config, **kwargs):
 
         # Load the new-style messages config if it exists. Otherwise fall back
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index bef89e2bf..b3e3e6dda 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -24,6 +24,8 @@ from synapse.util.stringutils import random_string_with_symbols
 
 
 class AccountValidityConfig(Config):
+    section = "accountvalidity"
+
     def __init__(self, config, synapse_config):
         self.enabled = config.get("enabled", False)
         self.renew_by_email_enabled = "renew_at" in config
@@ -77,6 +79,8 @@ class AccountValidityConfig(Config):
 
 
 class RegistrationConfig(Config):
+    section = "registration"
+
     def read_config(self, config, **kwargs):
         self.enable_registration = bool(
             strtobool(str(config.get("enable_registration", False)))
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 14740891f..d0205e14b 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -78,6 +78,8 @@ def parse_thumbnail_requirements(thumbnail_sizes):
 
 
 class ContentRepositoryConfig(Config):
+    section = "media"
+
     def read_config(self, config, **kwargs):
 
         # Only enable the media repo if either the media repo is enabled or the
diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py
index a92693017..7c9f05bde 100644
--- a/synapse/config/room_directory.py
+++ b/synapse/config/room_directory.py
@@ -19,6 +19,8 @@ from ._base import Config, ConfigError
 
 
 class RoomDirectoryConfig(Config):
+    section = "roomdirectory"
+
     def read_config(self, config, **kwargs):
         self.enable_room_list_search = config.get("enable_room_list_search", True)
 
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index ab34b41ca..c407e1368 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -55,6 +55,8 @@ def _dict_merge(merge_dict, into_dict):
 
 
 class SAML2Config(Config):
+    section = "saml2"
+
     def read_config(self, config, **kwargs):
         self.saml2_enabled = False
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 709bd387e..afc4d6a4a 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -58,6 +58,8 @@ on how to configure the new listener.
 
 
 class ServerConfig(Config):
+    section = "server"
+
     def read_config(self, config, **kwargs):
         self.server_name = config["server_name"]
         self.server_context = config.get("server_context", None)
diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py
index 6d4285ef9..6ea2ea886 100644
--- a/synapse/config/server_notices_config.py
+++ b/synapse/config/server_notices_config.py
@@ -59,6 +59,8 @@ class ServerNoticesConfig(Config):
             None if server notices are not enabled.
     """
 
+    section = "servernotices"
+
     def __init__(self, *args):
         super(ServerNoticesConfig, self).__init__(*args)
         self.server_notices_mxid = None
diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py
index e40797ab5..36e0ddab5 100644
--- a/synapse/config/spam_checker.py
+++ b/synapse/config/spam_checker.py
@@ -19,6 +19,8 @@ from ._base import Config
 
 
 class SpamCheckerConfig(Config):
+    section = "spamchecker"
+
     def read_config(self, config, **kwargs):
         self.spam_checker = None
 
diff --git a/synapse/config/stats.py b/synapse/config/stats.py
index b18ddbd1f..62485189e 100644
--- a/synapse/config/stats.py
+++ b/synapse/config/stats.py
@@ -25,6 +25,8 @@ class StatsConfig(Config):
     Configuration for the behaviour of synapse's stats engine
     """
 
+    section = "stats"
+
     def read_config(self, config, **kwargs):
         self.stats_enabled = True
         self.stats_bucket_size = 86400 * 1000
diff --git a/synapse/config/third_party_event_rules.py b/synapse/config/third_party_event_rules.py
index b3431441b..10a99c792 100644
--- a/synapse/config/third_party_event_rules.py
+++ b/synapse/config/third_party_event_rules.py
@@ -19,6 +19,8 @@ from ._base import Config
 
 
 class ThirdPartyRulesConfig(Config):
+    section = "thirdpartyrules"
+
     def read_config(self, config, **kwargs):
         self.third_party_event_rules = None
 
diff --git a/synapse/config/tls.py b/synapse/config/tls.py
index fc47ba3e9..f06341eb6 100644
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -18,6 +18,7 @@ import os
 import warnings
 from datetime import datetime
 from hashlib import sha256
+from typing import List
 
 import six
 
@@ -33,7 +34,9 @@ logger = logging.getLogger(__name__)
 
 
 class TlsConfig(Config):
-    def read_config(self, config, config_dir_path, **kwargs):
+    section = "tls"
+
+    def read_config(self, config: dict, config_dir_path: str, **kwargs):
 
         acme_config = config.get("acme", None)
         if acme_config is None:
@@ -57,7 +60,7 @@ class TlsConfig(Config):
         self.tls_certificate_file = self.abspath(config.get("tls_certificate_path"))
         self.tls_private_key_file = self.abspath(config.get("tls_private_key_path"))
 
-        if self.has_tls_listener():
+        if self.root.server.has_tls_listener():
             if not self.tls_certificate_file:
                 raise ConfigError(
                     "tls_certificate_path must be specified if TLS-enabled listeners are "
@@ -108,7 +111,7 @@ class TlsConfig(Config):
         )
 
         # Support globs (*) in whitelist values
-        self.federation_certificate_verification_whitelist = []
+        self.federation_certificate_verification_whitelist = []  # type: List[str]
         for entry in fed_whitelist_entries:
             try:
                 entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii"))
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index 85d99a316..8be134611 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -19,6 +19,8 @@ from ._base import Config, ConfigError
 
 
 class TracerConfig(Config):
+    section = "tracing"
+
     def read_config(self, config, **kwargs):
         opentracing_config = config.get("opentracing")
         if opentracing_config is None:
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index f6313e17d..c8d19c5d6 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -21,6 +21,8 @@ class UserDirectoryConfig(Config):
     Configuration for the behaviour of the /user_directory API
     """
 
+    section = "userdirectory"
+
     def read_config(self, config, **kwargs):
         self.user_directory_search_enabled = True
         self.user_directory_search_all_users = False
diff --git a/synapse/config/voip.py b/synapse/config/voip.py
index 2ca0e1cf7..a68a3068a 100644
--- a/synapse/config/voip.py
+++ b/synapse/config/voip.py
@@ -16,6 +16,8 @@ from ._base import Config
 
 
 class VoipConfig(Config):
+    section = "voip"
+
     def read_config(self, config, **kwargs):
         self.turn_uris = config.get("turn_uris", [])
         self.turn_shared_secret = config.get("turn_shared_secret")
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 1ec499862..fef72ed97 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -21,6 +21,8 @@ class WorkerConfig(Config):
     They have their own pid_file and listener configuration. They use the
     replication_url to talk to the main synapse process."""
 
+    section = "worker"
+
     def read_config(self, config, **kwargs):
         self.worker_app = config.get("worker_app")
 
diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py
index b02780772..1be6ff563 100644
--- a/tests/config/test_tls.py
+++ b/tests/config/test_tls.py
@@ -21,17 +21,24 @@ import yaml
 
 from OpenSSL import SSL
 
+from synapse.config._base import Config, RootConfig
 from synapse.config.tls import ConfigError, TlsConfig
 from synapse.crypto.context_factory import ClientTLSOptionsFactory
 
 from tests.unittest import TestCase
 
 
-class TestConfig(TlsConfig):
+class FakeServer(Config):
+    section = "server"
+
     def has_tls_listener(self):
         return False
 
 
+class TestConfig(RootConfig):
+    config_classes = [FakeServer, TlsConfig]
+
+
 class TLSConfigTests(TestCase):
     def test_warn_self_signed(self):
         """
@@ -202,13 +209,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         conf = TestConfig()
         conf.read_config(
             yaml.safe_load(
-                TestConfig().generate_config_section(
+                TestConfig().generate_config(
                     "/config_dir_path",
                     "my_super_secure_server",
                     "/data_dir_path",
-                    "/tls_cert_path",
-                    "tls_private_key",
-                    None,  # This is the acme_domain
+                    tls_certificate_path="/tls_cert_path",
+                    tls_private_key_path="tls_private_key",
+                    acme_domain=None,  # This is the acme_domain
                 )
             ),
             "/config_dir_path",
@@ -223,13 +230,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         conf = TestConfig()
         conf.read_config(
             yaml.safe_load(
-                TestConfig().generate_config_section(
+                TestConfig().generate_config(
                     "/config_dir_path",
                     "my_super_secure_server",
                     "/data_dir_path",
-                    "/tls_cert_path",
-                    "tls_private_key",
-                    "my_supe_secure_server",  # This is the acme_domain
+                    tls_certificate_path="/tls_cert_path",
+                    tls_private_key_path="tls_private_key",
+                    acme_domain="my_supe_secure_server",  # This is the acme_domain
                 )
             ),
             "/config_dir_path",
diff --git a/tox.ini b/tox.ini
index 1bce10a4c..367cc2ccf 100644
--- a/tox.ini
+++ b/tox.ini
@@ -163,10 +163,9 @@ deps =
     {[base]deps}
     mypy
     mypy-zope
-    typeshed
 env =
     MYPYPATH = stubs/
 extras = all
-commands = mypy --show-traceback \
+commands = mypy --show-traceback --check-untyped-defs --show-error-codes --follow-imports=normal \
             synapse/logging/ \
             synapse/config/

From da815c1f695ceca56643d7814c96f7a3cfa3c70a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 10 Oct 2019 10:06:45 +0100
Subject: [PATCH 224/276] Move tag/push rules room upgrade checking ealier
 (#6155)

It turns out that _local_membership_update doesn't run when you join a new, remote room. It only runs if you're joining a room that your server already knows about. This would explain #4703 and #5295 and why the transfer would work in testing and some rooms, but not others. This would especially hit single-user homeservers.

The check has been moved to right after the room has been joined, and works much more reliably. (Though it may still be a bit awkward of a place).
---
 changelog.d/6155.bugfix         |  1 +
 synapse/handlers/room_member.py | 62 +++++++++++++++++++++++----------
 2 files changed, 45 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/6155.bugfix

diff --git a/changelog.d/6155.bugfix b/changelog.d/6155.bugfix
new file mode 100644
index 000000000..e32c0dce0
--- /dev/null
+++ b/changelog.d/6155.bugfix
@@ -0,0 +1 @@
+Fix transferring notifications and tags when joining an upgraded room that is new to your server.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 95a244d86..380e2fad5 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -203,23 +203,11 @@ class RoomMemberHandler(object):
                 prev_member_event = yield self.store.get_event(prev_member_event_id)
                 newly_joined = prev_member_event.membership != Membership.JOIN
             if newly_joined:
+                # Copy over user state if we're joining an upgraded room
+                yield self.copy_user_state_if_room_upgrade(
+                    room_id, requester.user.to_string()
+                )
                 yield self._user_joined_room(target, room_id)
-
-            # Copy over direct message status and room tags if this is a join
-            # on an upgraded room
-
-            # Check if this is an upgraded room
-            predecessor = yield self.store.get_room_predecessor(room_id)
-
-            if predecessor:
-                # It is an upgraded room. Copy over old tags
-                self.copy_room_tags_and_direct_to_room(
-                    predecessor["room_id"], room_id, user_id
-                )
-                # Copy over push rules
-                yield self.store.copy_push_rules_from_room_to_room_for_user(
-                    predecessor["room_id"], room_id, user_id
-                )
         elif event.membership == Membership.LEAVE:
             if prev_member_event_id:
                 prev_member_event = yield self.store.get_event(prev_member_event_id)
@@ -463,10 +451,16 @@ class RoomMemberHandler(object):
                 if requester.is_guest:
                     content["kind"] = "guest"
 
-                ret = yield self._remote_join(
+                remote_join_response = yield self._remote_join(
                     requester, remote_room_hosts, room_id, target, content
                 )
-                return ret
+
+                # Copy over user state if this is a join on an remote upgraded room
+                yield self.copy_user_state_if_room_upgrade(
+                    room_id, requester.user.to_string()
+                )
+
+                return remote_join_response
 
         elif effective_membership_state == Membership.LEAVE:
             if not is_host_in_room:
@@ -503,6 +497,38 @@ class RoomMemberHandler(object):
         )
         return res
 
+    @defer.inlineCallbacks
+    def copy_user_state_if_room_upgrade(self, new_room_id, user_id):
+        """Copy user-specific information when they join a new room if that new room is the
+        result of a room upgrade
+
+        Args:
+            new_room_id (str): The ID of the room the user is joining
+            user_id (str): The ID of the user
+
+        Returns:
+            Deferred
+        """
+        # Check if the new room is an upgraded room
+        predecessor = yield self.store.get_room_predecessor(new_room_id)
+        if not predecessor:
+            return
+
+        logger.debug(
+            "Found predecessor for %s: %s. Copying over room tags and push " "rules",
+            new_room_id,
+            predecessor,
+        )
+
+        # It is an upgraded room. Copy over old tags
+        yield self.copy_room_tags_and_direct_to_room(
+            predecessor["room_id"], new_room_id, user_id
+        )
+        # Copy over push rules
+        yield self.store.copy_push_rules_from_room_to_room_for_user(
+            predecessor["room_id"], new_room_id, user_id
+        )
+
     @defer.inlineCallbacks
     def send_membership_event(self, requester, event, context, ratelimit=True):
         """

From 1d6dd1c2944c22147258dda8ccf2777c68b38fba Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 10:53:06 +0100
Subject: [PATCH 225/276] Move patch_inline_callbacks into synapse/

---
 synapse/__init__.py                               | 2 +-
 {tests => synapse/util}/patch_inline_callbacks.py | 2 +-
 tests/__init__.py                                 | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename {tests => synapse/util}/patch_inline_callbacks.py (98%)

diff --git a/synapse/__init__.py b/synapse/__init__.py
index 1055f54e0..bf102244a 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -20,7 +20,7 @@
 import os
 import sys
 
-from tests.patch_inline_callbacks import do_patch
+from synapse.util.patch_inline_callbacks import do_patch
 
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 5):
diff --git a/tests/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
similarity index 98%
rename from tests/patch_inline_callbacks.py
rename to synapse/util/patch_inline_callbacks.py
index a35a1d330..4fb49b0b2 100644
--- a/tests/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -105,7 +105,7 @@ def do_patch():
 
 
 def _check_yield_points(f, changes, start_context):
-    """Wraps a generator that is about to passed to defer.inlineCallbacks
+    """Wraps a generator that is about to be passed to defer.inlineCallbacks
     checking that after every yield the log contexts are correct.
     """
 
diff --git a/tests/__init__.py b/tests/__init__.py
index f7fc502f0..ed805db1c 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -16,9 +16,9 @@
 
 from twisted.trial import util
 
-import tests.patch_inline_callbacks
+from synapse.util.patch_inline_callbacks import do_patch
 
 # attempt to do the patch before we load any synapse code
-tests.patch_inline_callbacks.do_patch()
+do_patch()
 
 util.DEFAULT_TIMEOUT_DURATION = 20

From 3e4272961a4cb659513bccd981cbd42f4e506362 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 10:58:32 +0100
Subject: [PATCH 226/276] Test for sentinel commit

---
 synapse/util/patch_inline_callbacks.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 4fb49b0b2..4a45824f5 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -146,13 +146,30 @@ def _check_yield_points(f, changes, start_context):
                     # raise Exception(err)
                 return getattr(e, "value", None)
 
+            frame = gen.gi_frame
+
+            if isinstance(d, defer.Deferred):
+                # This happens if we yield on a deferred that doesn't follow
+                # the log context rules without wrappin in a `make_deferred_yieldable`
+                if LoggingContext.current_context() != LoggingContext.Sentinel:
+                    err = (
+                        "%s yielded with context %s rather than Sentinel,"
+                        " yielded on line %d in %s"
+                        % (
+                            frame.f_code.co_name,
+                            start_context,
+                            LoggingContext.current_context(),
+                            frame.f_lineno,
+                            frame.f_code.co_filename,
+                        )
+                    )
+                    changes.append(err)
+
             try:
                 result = yield d
             except Exception as e:
                 result = Failure(e)
 
-            frame = gen.gi_frame
-
             if LoggingContext.current_context() != expected_context:
                 # This happens because the context is lost sometime *after* the
                 # previous yield and *after* the current yield. E.g. the

From ec0596f2ab4502c9a6183813a7e5dc2a5bfedd48 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 10:59:07 +0100
Subject: [PATCH 227/276] Log correct context

---
 synapse/util/patch_inline_callbacks.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 4a45824f5..5ef7190b1 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -117,7 +117,7 @@ def _check_yield_points(f, changes, start_context):
 
         gen = f(*args, **kwargs)
 
-        last_yield_line_no = 1
+        last_yield_line_no = gen.gi_frame.f_lineno
         result = None
         while True:
             try:
@@ -136,7 +136,7 @@ def _check_yield_points(f, changes, start_context):
                         " in %s between %d and end of func"
                         % (
                             f.__qualname__,
-                            start_context,
+                            expected_context,
                             LoggingContext.current_context(),
                             f.__code__.co_filename,
                             last_yield_line_no,
@@ -148,22 +148,22 @@ def _check_yield_points(f, changes, start_context):
 
             frame = gen.gi_frame
 
-            if isinstance(d, defer.Deferred):
+            if isinstance(d, defer.Deferred) and not d.called:
                 # This happens if we yield on a deferred that doesn't follow
                 # the log context rules without wrappin in a `make_deferred_yieldable`
-                if LoggingContext.current_context() != LoggingContext.Sentinel:
+                if LoggingContext.current_context() is not LoggingContext.sentinel:
                     err = (
-                        "%s yielded with context %s rather than Sentinel,"
+                        "%s yielded with context %s rather than sentinel,"
                         " yielded on line %d in %s"
                         % (
                             frame.f_code.co_name,
-                            start_context,
                             LoggingContext.current_context(),
                             frame.f_lineno,
                             frame.f_code.co_filename,
                         )
                     )
                     changes.append(err)
+                    # raise Exception(err)
 
             try:
                 result = yield d

From 128d5948c4e0066f1263b347198f4754e72010c8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 11:16:26 +0100
Subject: [PATCH 228/276] Fix packaging

---
 synapse/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/synapse/__init__.py b/synapse/__init__.py
index bf102244a..56df3f5ac 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -20,8 +20,6 @@
 import os
 import sys
 
-from synapse.util.patch_inline_callbacks import do_patch
-
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 5):
     print("Synapse requires Python 3.5 or above.")
@@ -41,4 +39,8 @@ except ImportError:
 __version__ = "1.4.0"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
+    # We import here so that we don't have to install a bunch of deps when
+    # running the packaging tox test.
+    from synapse.util.patch_inline_callbacks import do_patch
+
     do_patch()

From f4571a0497791285ff044d293a6287ed90575b71 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 11:17:00 +0100
Subject: [PATCH 229/276] Newsfile

---
 changelog.d/6127.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6127.misc

diff --git a/changelog.d/6127.misc b/changelog.d/6127.misc
new file mode 100644
index 000000000..7bfbcfc25
--- /dev/null
+++ b/changelog.d/6127.misc
@@ -0,0 +1 @@
+Add env var to turn on tracking of log context changes.

From 562b4e51dd0e7d4a6f776502b9ac357ed3428445 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 10 Oct 2019 11:28:23 +0100
Subject: [PATCH 230/276] Rewrite the user_filter migration again (#6184)

you can't plausibly ALTER TABLE in sqlite, so we create the new table with the
right schema to start with.
---
 changelog.d/6184.misc                         |  1 +
 .../delta/56/unique_user_filter_index.py      | 58 ++++++++++---------
 2 files changed, 33 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/6184.misc

diff --git a/changelog.d/6184.misc b/changelog.d/6184.misc
new file mode 100644
index 000000000..30b3e5608
--- /dev/null
+++ b/changelog.d/6184.misc
@@ -0,0 +1 @@
+Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this.
\ No newline at end of file
diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py
index 60031f23c..1de8b5496 100644
--- a/synapse/storage/schema/delta/56/unique_user_filter_index.py
+++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py
@@ -5,42 +5,48 @@ from synapse.storage.engines import PostgresEngine
 logger = logging.getLogger(__name__)
 
 
+"""
+This migration updates the user_filters table as follows:
+
+ - drops any (user_id, filter_id) duplicates
+ - makes the columns NON-NULLable
+ - turns the index into a UNIQUE index
+"""
+
+
 def run_upgrade(cur, database_engine, *args, **kwargs):
+    pass
+
+
+def run_create(cur, database_engine, *args, **kwargs):
     if isinstance(database_engine, PostgresEngine):
         select_clause = """
-        CREATE TEMPORARY TABLE user_filters_migration AS
             SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json
-            FROM user_filters;
+            FROM user_filters
         """
     else:
         select_clause = """
-        CREATE TEMPORARY TABLE user_filters_migration AS
-            SELECT * FROM user_filters GROUP BY user_id, filter_id;
+            SELECT * FROM user_filters GROUP BY user_id, filter_id
         """
-    sql = (
-        """
-        BEGIN;
-            %s
-            DROP INDEX user_filters_by_user_id_filter_id;
-            DELETE FROM user_filters;
-            ALTER TABLE user_filters
-               ALTER COLUMN user_id SET NOT NULL,
-               ALTER COLUMN filter_id SET NOT NULL,
-               ALTER COLUMN filter_json SET NOT NULL;
-            INSERT INTO user_filters(user_id, filter_id, filter_json)
-                SELECT * FROM user_filters_migration;
-            DROP TABLE user_filters_migration;
-            CREATE UNIQUE INDEX user_filters_by_user_id_filter_id_unique
-                ON user_filters(user_id, filter_id);
-        END;
-    """
-        % select_clause
+    sql = """
+            DROP TABLE IF EXISTS user_filters_migration;
+            DROP INDEX IF EXISTS user_filters_unique;
+            CREATE TABLE user_filters_migration (
+                user_id TEXT NOT NULL,
+                filter_id BIGINT NOT NULL,
+                filter_json BYTEA NOT NULL
+            );
+            INSERT INTO user_filters_migration (user_id, filter_id, filter_json)
+                %s;
+            CREATE UNIQUE INDEX user_filters_unique ON user_filters_migration
+                (user_id, filter_id);
+            DROP TABLE user_filters;
+            ALTER TABLE user_filters_migration RENAME TO user_filters;
+        """ % (
+        select_clause,
     )
+
     if isinstance(database_engine, PostgresEngine):
         cur.execute(sql)
     else:
         cur.executescript(sql)
-
-
-def run_create(cur, database_engine, *args, **kwargs):
-    pass

From a139420a3cfda6a4a4ee4750611b31dd71fc33f3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 10 Oct 2019 11:29:01 +0100
Subject: [PATCH 231/276] Fix races in room stats (and other) updates. (#6187)

Hopefully this will fix the occasional failures we were seeing in the room directory.

The problem was that events are not necessarily persisted (and `current_state_delta_stream` updated) in the same order as their stream_id. So for instance current_state_delta 9 might be persisted *before* current_state_delta 8. Then, when the room stats saw stream_id 9, it assumed it had done everything up to 9, and never came back to do stream_id 8.

We can solve this easily by only processing up to the stream_id where we know all events have been persisted.
---
 changelog.d/6187.bugfix            |  1 +
 synapse/handlers/presence.py       | 16 +++++++++----
 synapse/handlers/stats.py          | 12 ++++++----
 synapse/handlers/user_directory.py | 17 +++++++++----
 synapse/storage/state_deltas.py    | 38 +++++++++++++++++++++++-------
 tests/handlers/test_typing.py      |  2 +-
 tests/rest/admin/test_admin.py     |  2 +-
 7 files changed, 63 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/6187.bugfix

diff --git a/changelog.d/6187.bugfix b/changelog.d/6187.bugfix
new file mode 100644
index 000000000..6142c5b98
--- /dev/null
+++ b/changelog.d/6187.bugfix
@@ -0,0 +1 @@
+Fix occasional missed updates in the room and user directories.
\ No newline at end of file
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 053cf66b2..2a5f1a007 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -803,17 +803,25 @@ class PresenceHandler(object):
         # Loop round handling deltas until we're up to date
         while True:
             with Measure(self.clock, "presence_delta"):
-                deltas = yield self.store.get_current_state_deltas(self._event_pos)
-                if not deltas:
+                room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+                if self._event_pos == room_max_stream_ordering:
                     return
 
+                logger.debug(
+                    "Processing presence stats %s->%s",
+                    self._event_pos,
+                    room_max_stream_ordering,
+                )
+                max_pos, deltas = yield self.store.get_current_state_deltas(
+                    self._event_pos, room_max_stream_ordering
+                )
                 yield self._handle_state_delta(deltas)
 
-                self._event_pos = deltas[-1]["stream_id"]
+                self._event_pos = max_pos
 
                 # Expose current event processing position to prometheus
                 synapse.metrics.event_processing_positions.labels("presence").set(
-                    self._event_pos
+                    max_pos
                 )
 
     @defer.inlineCallbacks
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index c62b11311..466daf920 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -87,21 +87,23 @@ class StatsHandler(StateDeltasHandler):
             # Be sure to read the max stream_ordering *before* checking if there are any outstanding
             # deltas, since there is otherwise a chance that we could miss updates which arrive
             # after we check the deltas.
-            room_max_stream_ordering = yield self.store.get_room_max_stream_ordering()
+            room_max_stream_ordering = self.store.get_room_max_stream_ordering()
             if self.pos == room_max_stream_ordering:
                 break
 
-            deltas = yield self.store.get_current_state_deltas(self.pos)
+            logger.debug(
+                "Processing room stats %s->%s", self.pos, room_max_stream_ordering
+            )
+            max_pos, deltas = yield self.store.get_current_state_deltas(
+                self.pos, room_max_stream_ordering
+            )
 
             if deltas:
                 logger.debug("Handling %d state deltas", len(deltas))
                 room_deltas, user_deltas = yield self._handle_deltas(deltas)
-
-                max_pos = deltas[-1]["stream_id"]
             else:
                 room_deltas = {}
                 user_deltas = {}
-                max_pos = room_max_stream_ordering
 
             # Then count deltas for total_events and total_event_bytes.
             room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes(
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index e53669e40..624f05ab5 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -138,21 +138,28 @@ class UserDirectoryHandler(StateDeltasHandler):
         # Loop round handling deltas until we're up to date
         while True:
             with Measure(self.clock, "user_dir_delta"):
-                deltas = yield self.store.get_current_state_deltas(self.pos)
-                if not deltas:
+                room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+                if self.pos == room_max_stream_ordering:
                     return
 
+                logger.debug(
+                    "Processing user stats %s->%s", self.pos, room_max_stream_ordering
+                )
+                max_pos, deltas = yield self.store.get_current_state_deltas(
+                    self.pos, room_max_stream_ordering
+                )
+
                 logger.info("Handling %d state deltas", len(deltas))
                 yield self._handle_deltas(deltas)
 
-                self.pos = deltas[-1]["stream_id"]
+                self.pos = max_pos
 
                 # Expose current event processing position to prometheus
                 synapse.metrics.event_processing_positions.labels("user_dir").set(
-                    self.pos
+                    max_pos
                 )
 
-                yield self.store.update_user_directory_stream_pos(self.pos)
+                yield self.store.update_user_directory_stream_pos(max_pos)
 
     @defer.inlineCallbacks
     def _handle_deltas(self, deltas):
diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py
index 5fdb44210..28f33ec18 100644
--- a/synapse/storage/state_deltas.py
+++ b/synapse/storage/state_deltas.py
@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
 
 
 class StateDeltasStore(SQLBaseStore):
-    def get_current_state_deltas(self, prev_stream_id):
+    def get_current_state_deltas(self, prev_stream_id: int, max_stream_id: int):
         """Fetch a list of room state changes since the given stream id
 
         Each entry in the result contains the following fields:
@@ -36,15 +36,27 @@ class StateDeltasStore(SQLBaseStore):
 
         Args:
             prev_stream_id (int): point to get changes since (exclusive)
+            max_stream_id (int): the point that we know has been correctly persisted
+               - ie, an upper limit to return changes from.
 
         Returns:
-            Deferred[list[dict]]: results
+            Deferred[tuple[int, list[dict]]: A tuple consisting of:
+               - the stream id which these results go up to
+               - list of current_state_delta_stream rows. If it is empty, we are
+                 up to date.
         """
         prev_stream_id = int(prev_stream_id)
+
+        # check we're not going backwards
+        assert prev_stream_id <= max_stream_id
+
         if not self._curr_state_delta_stream_cache.has_any_entity_changed(
             prev_stream_id
         ):
-            return []
+            # if the CSDs haven't changed between prev_stream_id and now, we
+            # know for certain that they haven't changed between prev_stream_id and
+            # max_stream_id.
+            return max_stream_id, []
 
         def get_current_state_deltas_txn(txn):
             # First we calculate the max stream id that will give us less than
@@ -54,21 +66,29 @@ class StateDeltasStore(SQLBaseStore):
             sql = """
                 SELECT stream_id, count(*)
                 FROM current_state_delta_stream
-                WHERE stream_id > ?
+                WHERE stream_id > ? AND stream_id <= ?
                 GROUP BY stream_id
                 ORDER BY stream_id ASC
                 LIMIT 100
             """
-            txn.execute(sql, (prev_stream_id,))
+            txn.execute(sql, (prev_stream_id, max_stream_id))
 
             total = 0
-            max_stream_id = prev_stream_id
-            for max_stream_id, count in txn:
+
+            for stream_id, count in txn:
                 total += count
                 if total > 100:
                     # We arbitarily limit to 100 entries to ensure we don't
                     # select toooo many.
+                    logger.debug(
+                        "Clipping current_state_delta_stream rows to stream_id %i",
+                        stream_id,
+                    )
+                    clipped_stream_id = stream_id
                     break
+            else:
+                # if there's no problem, we may as well go right up to the max_stream_id
+                clipped_stream_id = max_stream_id
 
             # Now actually get the deltas
             sql = """
@@ -77,8 +97,8 @@ class StateDeltasStore(SQLBaseStore):
                 WHERE ? < stream_id AND stream_id <= ?
                 ORDER BY stream_id ASC
             """
-            txn.execute(sql, (prev_stream_id, max_stream_id))
-            return self.cursor_to_dict(txn)
+            txn.execute(sql, (prev_stream_id, clipped_stream_id))
+            return clipped_stream_id, self.cursor_to_dict(txn)
 
         return self.runInteraction(
             "get_current_state_deltas", get_current_state_deltas_txn
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 1f2ef5d01..67f101305 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -139,7 +139,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             defer.succeed(1)
         )
 
-        self.datastore.get_current_state_deltas.return_value = None
+        self.datastore.get_current_state_deltas.return_value = (0, None)
 
         self.datastore.get_to_device_stream_token = lambda: 0
         self.datastore.get_new_device_msgs_for_remote = lambda *args, **kargs: ([], 0)
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py
index 5877bb213..d3a4f717f 100644
--- a/tests/rest/admin/test_admin.py
+++ b/tests/rest/admin/test_admin.py
@@ -62,7 +62,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase):
         self.device_handler.check_device_registered = Mock(return_value="FAKE")
 
         self.datastore = Mock(return_value=Mock())
-        self.datastore.get_current_state_deltas = Mock(return_value=[])
+        self.datastore.get_current_state_deltas = Mock(return_value=(0, []))
 
         self.secrets = Mock()
 

From 791a8c559bf4ea984637c047fad7d6097e34ce99 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 11:53:57 +0100
Subject: [PATCH 232/276] Add coments

---
 synapse/util/patch_inline_callbacks.py | 30 +++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 5ef7190b1..b518dae25 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -107,6 +107,19 @@ def do_patch():
 def _check_yield_points(f, changes, start_context):
     """Wraps a generator that is about to be passed to defer.inlineCallbacks
     checking that after every yield the log contexts are correct.
+
+    Its perfectly valid for log contexts to change within a function, e.g. due
+    to new Measure blocks, so such changes are added to the given `changes`
+    list instead of triggering an exception.
+
+    Args:
+        f: generator function to wrap
+        changes (list[str]): A list of strings detailing how the contexts
+            changed within a function.
+        start_context (LoggingContext): The initial context we're expecting
+
+    Returns:
+        function
     """
 
     from synapse.logging.context import LoggingContext
@@ -131,6 +144,10 @@ def _check_yield_points(f, changes, start_context):
                     # This happens when the context is lost sometime *after* the
                     # final yield and returning. E.g. we forgot to yield on a
                     # function that returns a deferred.
+                    #
+                    # We don't raise here as its perfectly valid for contexts to
+                    # change in a function, as long as it sets the correct context
+                    # on resolving (which is checked separately).
                     err = (
                         "Function %r returned and changed context from %s to %s,"
                         " in %s between %d and end of func"
@@ -143,14 +160,14 @@ def _check_yield_points(f, changes, start_context):
                         )
                     )
                     changes.append(err)
-                    # raise Exception(err)
                 return getattr(e, "value", None)
 
             frame = gen.gi_frame
 
             if isinstance(d, defer.Deferred) and not d.called:
                 # This happens if we yield on a deferred that doesn't follow
-                # the log context rules without wrappin in a `make_deferred_yieldable`
+                # the log context rules without wrappin in a `make_deferred_yieldable`.
+                # We raise here as this should never happen.
                 if LoggingContext.current_context() is not LoggingContext.sentinel:
                     err = (
                         "%s yielded with context %s rather than sentinel,"
@@ -162,8 +179,7 @@ def _check_yield_points(f, changes, start_context):
                             frame.f_code.co_filename,
                         )
                     )
-                    changes.append(err)
-                    # raise Exception(err)
+                    raise Exception(err)
 
             try:
                 result = yield d
@@ -171,10 +187,15 @@ def _check_yield_points(f, changes, start_context):
                 result = Failure(e)
 
             if LoggingContext.current_context() != expected_context:
+
                 # This happens because the context is lost sometime *after* the
                 # previous yield and *after* the current yield. E.g. the
                 # deferred we waited on didn't follow the rules, or we forgot to
                 # yield on a function between the two yield points.
+                #
+                # We don't raise here as its perfectly valid for contexts to
+                # change in a function, as long as it sets the correct context
+                # on resolving (which is checked separately).
                 err = (
                     "%s changed context from %s to %s, happened between lines %d and %d in %s"
                     % (
@@ -187,7 +208,6 @@ def _check_yield_points(f, changes, start_context):
                     )
                 )
                 changes.append(err)
-                # raise Exception(err)
 
                 expected_context = LoggingContext.current_context()
 

From 0aee4900131bf97dde0f0ff5d1f7133147ff5bc7 Mon Sep 17 00:00:00 2001
From: James <james@hebden.net.au>
Date: Thu, 10 Oct 2019 21:59:06 +1100
Subject: [PATCH 233/276] Add snapcraft packaging information (#6084)

---
 .gitignore            |  1 +
 changelog.d/6084.misc |  1 +
 snap/snapcraft.yaml   | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 changelog.d/6084.misc
 create mode 100644 snap/snapcraft.yaml

diff --git a/.gitignore b/.gitignore
index 747b8714d..af36c00cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 *.egg-info
 *.lock
 *.pyc
+*.snap
 *.tac
 _trial_temp/
 _trial_temp*/
diff --git a/changelog.d/6084.misc b/changelog.d/6084.misc
new file mode 100644
index 000000000..3c3370165
--- /dev/null
+++ b/changelog.d/6084.misc
@@ -0,0 +1 @@
+Add snapcraft packaging information. Contributed by @devec0.
diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml
new file mode 100644
index 000000000..1f7df71db
--- /dev/null
+++ b/snap/snapcraft.yaml
@@ -0,0 +1,22 @@
+name: matrix-synapse
+base: core18
+version: git 
+summary: Reference Matrix homeserver
+description: |
+  Synapse is the reference Matrix homeserver.
+  Matrix is a federated and decentralised instant messaging and VoIP system.
+
+grade: stable 
+confinement: strict 
+
+apps:
+  matrix-synapse: 
+    command: synctl --no-daemonize start $SNAP_COMMON/homeserver.yaml
+    stop-command: synctl -c $SNAP_COMMON stop
+    plugs: [network-bind, network]
+    daemon: simple 
+parts:
+  matrix-synapse:
+    source: .
+    plugin: python
+    python-version: python3

From 941edad58355a829b49b0a43d382bbb0bf9ba021 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 12:15:17 +0100
Subject: [PATCH 234/276] Appease mypy

---
 synapse/util/patch_inline_callbacks.py | 31 +++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index b518dae25..64a2c891c 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -18,11 +18,17 @@ from __future__ import print_function
 import functools
 import sys
 
+from typing import List, Callable, Any
+
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
 from twisted.python.failure import Failure
 
 
+# Tracks if we've already patched inlineCallbacks
+_already_patched = False
+
+
 def do_patch():
     """
     Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit
@@ -30,16 +36,18 @@ def do_patch():
 
     from synapse.logging.context import LoggingContext
 
+    global _already_patched
+
     orig_inline_callbacks = defer.inlineCallbacks
-    if hasattr(orig_inline_callbacks, "patched_by_synapse"):
+    if _already_patched:
         return
 
     def new_inline_callbacks(f):
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
             start_context = LoggingContext.current_context()
-            changes = []
-            orig = orig_inline_callbacks(_check_yield_points(f, changes, start_context))
+            changes: List[str] = []
+            orig = orig_inline_callbacks(_check_yield_points(f, changes))
 
             try:
                 res = orig(*args, **kwargs)
@@ -101,10 +109,10 @@ def do_patch():
         return wrapped
 
     defer.inlineCallbacks = new_inline_callbacks
-    new_inline_callbacks.patched_by_synapse = True
+    _already_patched = True
 
 
-def _check_yield_points(f, changes, start_context):
+def _check_yield_points(f: Callable, changes: List[str]):
     """Wraps a generator that is about to be passed to defer.inlineCallbacks
     checking that after every yield the log contexts are correct.
 
@@ -114,9 +122,8 @@ def _check_yield_points(f, changes, start_context):
 
     Args:
         f: generator function to wrap
-        changes (list[str]): A list of strings detailing how the contexts
+        changes: A list of strings detailing how the contexts
             changed within a function.
-        start_context (LoggingContext): The initial context we're expecting
 
     Returns:
         function
@@ -126,13 +133,13 @@ def _check_yield_points(f, changes, start_context):
 
     @functools.wraps(f)
     def check_yield_points_inner(*args, **kwargs):
-        expected_context = start_context
-
         gen = f(*args, **kwargs)
 
         last_yield_line_no = gen.gi_frame.f_lineno
-        result = None
+        result: Any = None
         while True:
+            expected_context = LoggingContext.current_context()
+
             try:
                 isFailure = isinstance(result, Failure)
                 if isFailure:
@@ -200,7 +207,7 @@ def _check_yield_points(f, changes, start_context):
                     "%s changed context from %s to %s, happened between lines %d and %d in %s"
                     % (
                         frame.f_code.co_name,
-                        start_context,
+                        expected_context,
                         LoggingContext.current_context(),
                         last_yield_line_no,
                         frame.f_lineno,
@@ -209,8 +216,6 @@ def _check_yield_points(f, changes, start_context):
                 )
                 changes.append(err)
 
-                expected_context = LoggingContext.current_context()
-
             last_yield_line_no = frame.f_lineno
 
     return check_yield_points_inner

From f735aeec65a5117c71cf0f1e5f61cb900683533a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 12:20:29 +0100
Subject: [PATCH 235/276] sort

---
 synapse/util/patch_inline_callbacks.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 64a2c891c..3b78451dc 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -17,14 +17,12 @@ from __future__ import print_function
 
 import functools
 import sys
-
-from typing import List, Callable, Any
+from typing import Any, Callable, List
 
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
 from twisted.python.failure import Failure
 
-
 # Tracks if we've already patched inlineCallbacks
 _already_patched = False
 

From c349e3ebafbe044022b93ca5c04d8b2fcb640c0a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 12:29:38 +0100
Subject: [PATCH 236/276] Fix py3.5

---
 synapse/util/patch_inline_callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 3b78451dc..812dc8883 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -44,7 +44,7 @@ def do_patch():
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
             start_context = LoggingContext.current_context()
-            changes: List[str] = []
+            changes = []  # type: List[str]
             orig = orig_inline_callbacks(_check_yield_points(f, changes))
 
             try:

From 59e0ed83065874775be350e25bb9f87da67b87c2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 12:47:07 +0100
Subject: [PATCH 237/276] Fix py3.5

---
 synapse/util/patch_inline_callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 812dc8883..66c3d4751 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -134,7 +134,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
         gen = f(*args, **kwargs)
 
         last_yield_line_no = gen.gi_frame.f_lineno
-        result: Any = None
+        result = None  # type: Any
         while True:
             expected_context = LoggingContext.current_context()
 

From 2efd050c9db2e96fd96535dc9b1c6f54acbd163d Mon Sep 17 00:00:00 2001
From: krombel <krombel@krombel.de>
Date: Thu, 10 Oct 2019 13:59:55 +0200
Subject: [PATCH 238/276] send 404 as http-status when filter-id is unknown to
 the server (#2380)

This fixed the weirdness of 400 vs 404 as http status code in the case
the filter id is not known by the server.
As e.g. matrix-js-sdk expects 404 to catch this situation this leads
to unwanted behaviour.
---
 changelog.d/2380.bugfix                   |  1 +
 synapse/rest/client/v2_alpha/filter.py    | 12 ++++---
 synapse/rest/client/v2_alpha/sync.py      | 41 +++++++++++++----------
 tests/rest/client/v2_alpha/test_filter.py |  2 +-
 4 files changed, 33 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/2380.bugfix

diff --git a/changelog.d/2380.bugfix b/changelog.d/2380.bugfix
new file mode 100644
index 000000000..eae320603
--- /dev/null
+++ b/changelog.d/2380.bugfix
@@ -0,0 +1 @@
+Return an HTTP 404 instead of 400 when requesting a filter by ID that is unknown to the server. Thanks to @krombel for contributing this!
diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py
index c6ddf24c8..17a8bc736 100644
--- a/synapse/rest/client/v2_alpha/filter.py
+++ b/synapse/rest/client/v2_alpha/filter.py
@@ -17,7 +17,7 @@ import logging
 
 from twisted.internet import defer
 
-from synapse.api.errors import AuthError, Codes, StoreError, SynapseError
+from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseError
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.types import UserID
 
@@ -52,13 +52,15 @@ class GetFilterRestServlet(RestServlet):
             raise SynapseError(400, "Invalid filter_id")
 
         try:
-            filter = yield self.filtering.get_user_filter(
+            filter_collection = yield self.filtering.get_user_filter(
                 user_localpart=target_user.localpart, filter_id=filter_id
             )
+        except StoreError as e:
+            if e.code != 404:
+                raise
+            raise NotFoundError("No such filter")
 
-            return 200, filter.get_filter_json()
-        except (KeyError, StoreError):
-            raise SynapseError(400, "No such filter", errcode=Codes.NOT_FOUND)
+        return 200, filter_collection.get_filter_json()
 
 
 class CreateFilterRestServlet(RestServlet):
diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py
index c98c5a380..a883c8add 100644
--- a/synapse/rest/client/v2_alpha/sync.py
+++ b/synapse/rest/client/v2_alpha/sync.py
@@ -21,7 +21,7 @@ from canonicaljson import json
 from twisted.internet import defer
 
 from synapse.api.constants import PresenceState
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, StoreError, SynapseError
 from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection
 from synapse.events.utils import (
     format_event_for_client_v2_without_room_id,
@@ -119,25 +119,32 @@ class SyncRestServlet(RestServlet):
 
         request_key = (user, timeout, since, filter_id, full_state, device_id)
 
-        if filter_id:
-            if filter_id.startswith("{"):
-                try:
-                    filter_object = json.loads(filter_id)
-                    set_timeline_upper_limit(
-                        filter_object, self.hs.config.filter_timeline_limit
-                    )
-                except Exception:
-                    raise SynapseError(400, "Invalid filter JSON")
-                self.filtering.check_valid_filter(filter_object)
-                filter = FilterCollection(filter_object)
-            else:
-                filter = yield self.filtering.get_user_filter(user.localpart, filter_id)
+        if filter_id is None:
+            filter_collection = DEFAULT_FILTER_COLLECTION
+        elif filter_id.startswith("{"):
+            try:
+                filter_object = json.loads(filter_id)
+                set_timeline_upper_limit(
+                    filter_object, self.hs.config.filter_timeline_limit
+                )
+            except Exception:
+                raise SynapseError(400, "Invalid filter JSON")
+            self.filtering.check_valid_filter(filter_object)
+            filter_collection = FilterCollection(filter_object)
         else:
-            filter = DEFAULT_FILTER_COLLECTION
+            try:
+                filter_collection = yield self.filtering.get_user_filter(
+                    user.localpart, filter_id
+                )
+            except StoreError as err:
+                if err.code != 404:
+                    raise
+                # fix up the description and errcode to be more useful
+                raise SynapseError(400, "No such filter", errcode=Codes.INVALID_PARAM)
 
         sync_config = SyncConfig(
             user=user,
-            filter_collection=filter,
+            filter_collection=filter_collection,
             is_guest=requester.is_guest,
             request_key=request_key,
             device_id=device_id,
@@ -171,7 +178,7 @@ class SyncRestServlet(RestServlet):
 
         time_now = self.clock.time_msec()
         response_content = yield self.encode_response(
-            time_now, sync_result, requester.access_token_id, filter
+            time_now, sync_result, requester.access_token_id, filter_collection
         )
 
         return 200, response_content
diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py
index f42a8efbf..e0e9e94fb 100644
--- a/tests/rest/client/v2_alpha/test_filter.py
+++ b/tests/rest/client/v2_alpha/test_filter.py
@@ -92,7 +92,7 @@ class FilterTestCase(unittest.HomeserverTestCase):
         )
         self.render(request)
 
-        self.assertEqual(channel.result["code"], b"400")
+        self.assertEqual(channel.result["code"], b"404")
         self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND)
 
     # Currently invalid params do not have an appropriate errcode

From 9a84d74417a1c9fbcd6c57e7ef23e5590e04ef49 Mon Sep 17 00:00:00 2001
From: Michael Telatynski <7t3chguy@gmail.com>
Date: Thu, 10 Oct 2019 13:03:44 +0100
Subject: [PATCH 239/276] before fulfilling a group invite,check if user is
 already joined/invited (#3436)

Fixes vector-im/riot-web#5645
---
 changelog.d/3436.bugfix         |  1 +
 synapse/groups/groups_server.py | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/3436.bugfix

diff --git a/changelog.d/3436.bugfix b/changelog.d/3436.bugfix
new file mode 100644
index 000000000..15714a11e
--- /dev/null
+++ b/changelog.d/3436.bugfix
@@ -0,0 +1 @@
+Fix a problem where users could be invited twice to the same group.
diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py
index d50e69143..8f10b6adb 100644
--- a/synapse/groups/groups_server.py
+++ b/synapse/groups/groups_server.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright 2017 Vector Creations Ltd
 # Copyright 2018 New Vector Ltd
+# Copyright 2019 Michael Telatynski <7t3chguy@gmail.com>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,16 +21,16 @@ from six import string_types
 
 from twisted.internet import defer
 
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, SynapseError
 from synapse.types import GroupID, RoomID, UserID, get_domain_from_id
 from synapse.util.async_helpers import concurrently_execute
 
 logger = logging.getLogger(__name__)
 
 
-# TODO: Allow users to "knock" or simpkly join depending on rules
+# TODO: Allow users to "knock" or simply join depending on rules
 # TODO: Federation admin APIs
-# TODO: is_priveged flag to users and is_public to users and rooms
+# TODO: is_privileged flag to users and is_public to users and rooms
 # TODO: Audit log for admins (profile updates, membership changes, users who tried
 #       to join but were rejected, etc)
 # TODO: Flairs
@@ -590,7 +591,18 @@ class GroupsServerHandler(object):
         )
 
         # TODO: Check if user knocked
-        # TODO: Check if user is already invited
+
+        invited_users = yield self.store.get_invited_users_in_group(group_id)
+        if user_id in invited_users:
+            raise SynapseError(
+                400, "User already invited to group", errcode=Codes.BAD_STATE
+            )
+
+        user_results = yield self.store.get_users_in_group(
+            group_id, include_private=True
+        )
+        if user_id in [user_result["user_id"] for user_result in user_results]:
+            raise SynapseError(400, "User already in group")
 
         content = {
             "profile": {"name": group["name"], "avatar_url": group["avatar_url"]},

From b5b03b7079a9baa34a25915d6a569e383e8307c3 Mon Sep 17 00:00:00 2001
From: werner291 <werner.kroneman@gmail.com>
Date: Thu, 10 Oct 2019 14:05:48 +0200
Subject: [PATCH 240/276] Add domain validation when creating room with list of
 invitees (#6121)

---
 changelog.d/4088.bugfix            | 1 +
 synapse/handlers/room.py           | 4 +++-
 tests/rest/client/v1/test_rooms.py | 9 +++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/4088.bugfix

diff --git a/changelog.d/4088.bugfix b/changelog.d/4088.bugfix
new file mode 100644
index 000000000..61722b622
--- /dev/null
+++ b/changelog.d/4088.bugfix
@@ -0,0 +1 @@
+Added domain validation when including a list of invitees upon room creation.
\ No newline at end of file
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 970be3c84..2816bd8f8 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -28,6 +28,7 @@ from twisted.internet import defer
 from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset
 from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
+from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.storage.state import StateFilter
 from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID
 from synapse.util import stringutils
@@ -554,7 +555,8 @@ class RoomCreationHandler(BaseHandler):
         invite_list = config.get("invite", [])
         for i in invite_list:
             try:
-                UserID.from_string(i)
+                uid = UserID.from_string(i)
+                parse_and_validate_server_name(uid.domain)
             except Exception:
                 raise SynapseError(400, "Invalid user_id: %s" % (i,))
 
diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py
index fe741637f..2f2ca7461 100644
--- a/tests/rest/client/v1/test_rooms.py
+++ b/tests/rest/client/v1/test_rooms.py
@@ -484,6 +484,15 @@ class RoomsCreateTestCase(RoomBase):
         self.render(request)
         self.assertEquals(400, channel.code)
 
+    def test_post_room_invitees_invalid_mxid(self):
+        # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088
+        # Note the trailing space in the MXID here!
+        request, channel = self.make_request(
+            "POST", "/createRoom", b'{"invite":["@alice:example.com "]}'
+        )
+        self.render(request)
+        self.assertEquals(400, channel.code)
+
 
 class RoomTopicTestCase(RoomBase):
     """ Tests /rooms/$room_id/topic REST events. """

From b4fbf71187545748edf3ebd931b49350e5b1ca74 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 19:06:12 +0100
Subject: [PATCH 241/276] Add helper funcs to use postgres ANY

This means that we can write queries with `col = ANY(?)`, which helps
postgres.
---
 synapse/storage/_base.py | 64 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index abe16334e..a94cbc27d 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -20,6 +20,7 @@ import random
 import sys
 import threading
 import time
+from typing import Iterable, List, Tuple
 
 from six import PY2, iteritems, iterkeys, itervalues
 from six.moves import builtins, intern, range
@@ -1162,19 +1163,20 @@ class SQLBaseStore(object):
         if not iterable:
             return []
 
-        sql = "SELECT %s FROM %s" % (", ".join(retcols), table)
-
         clauses = []
         values = []
-        clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable)))
-        values.extend(iterable)
+
+        add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values)
 
         for key, value in iteritems(keyvalues):
             clauses.append("%s = ?" % (key,))
             values.append(value)
 
-        if clauses:
-            sql = "%s WHERE %s" % (sql, " AND ".join(clauses))
+        sql = "SELECT %s FROM %s WHERE %s" % (
+            ", ".join(retcols),
+            table,
+            " AND ".join(clauses),
+        )
 
         txn.execute(sql, values)
         return cls.cursor_to_dict(txn)
@@ -1325,8 +1327,8 @@ class SQLBaseStore(object):
 
         clauses = []
         values = []
-        clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable)))
-        values.extend(iterable)
+
+        add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values)
 
         for key, value in iteritems(keyvalues):
             clauses.append("%s = ?" % (key,))
@@ -1693,3 +1695,49 @@ def db_to_json(db_content):
     except Exception:
         logging.warning("Tried to decode '%r' as JSON and failed", db_content)
         raise
+
+
+def add_in_list_sql_clause(
+    database_engine, column: str, iterable: Iterable, clauses: List[str], args: List
+):
+    """Adds an SQL clause to the given list of clauses/args that checks the
+    given column is in the iterable. c.f. `make_in_list_sql_clause`
+
+    Args:
+        database_engine
+        column: Name of the column
+        iterable: The values to check the column against.
+        clauses: A list to add the expanded clause to
+        args: A list of arguments that we append the args to.
+    """
+
+    clause, new_args = make_in_list_sql_clause(database_engine, column, iterable)
+    clauses.append(clause)
+    args.extend(new_args)
+
+
+def make_in_list_sql_clause(
+    database_engine, column: str, iterable: Iterable
+) -> Tuple[str, Iterable]:
+    """Returns an SQL clause that checks the given column is in the iterable.
+
+    On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres
+    it expands to `column = ANY(?)`. While both DBs support the `IN` form,
+    using the `ANY` form on postgres means that it views queries with
+    different length iterables as the same, helping the query stats.
+
+    Args:
+        database_engine
+        column: Name of the column
+        iterable: The values to check the column against.
+
+    Returns:
+        A tuple of SQL query and the args
+    """
+
+    if isinstance(database_engine, PostgresEngine):
+        # This should hopefully be faster, but also makes postgres query
+        # stats easier to understand.
+        return "%s = ANY(?)" % (column,), [list(iterable)]
+    else:
+        return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), iterable

From b161786c1455f219d58549b514f2551f25eae33a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 19:07:07 +0100
Subject: [PATCH 242/276] Replace IN usage with helper funcs

---
 synapse/storage/deviceinbox.py        | 14 +++----
 synapse/storage/devices.py            | 14 +++++--
 synapse/storage/event_federation.py   |  9 +++--
 synapse/storage/events.py             | 44 +++++++++++++---------
 synapse/storage/events_bg_updates.py  | 12 +++---
 synapse/storage/events_worker.py      | 28 ++++++++------
 synapse/storage/presence.py           | 14 +++----
 synapse/storage/receipts.py           | 53 +++++++++++++++------------
 synapse/storage/roommember.py         | 18 ++++++---
 synapse/storage/search.py             | 11 ++++--
 synapse/storage/user_erasure_store.py | 16 ++++----
 11 files changed, 137 insertions(+), 96 deletions(-)

diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py
index 70bc2bb2c..f04aad074 100644
--- a/synapse/storage/deviceinbox.py
+++ b/synapse/storage/deviceinbox.py
@@ -20,7 +20,7 @@ from canonicaljson import json
 from twisted.internet import defer
 
 from synapse.logging.opentracing import log_kv, set_tag, trace
-from synapse.storage._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.util.caches.expiringcache import ExpiringCache
 
@@ -378,15 +378,15 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore)
             else:
                 if not devices:
                     continue
-                sql = (
-                    "SELECT device_id FROM devices"
-                    " WHERE user_id = ? AND device_id IN ("
-                    + ",".join("?" * len(devices))
-                    + ")"
+
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "device_id", devices
                 )
+                sql = "SELECT device_id FROM devices WHERE user_id = ? AND " + clause
+
                 # TODO: Maybe this needs to be done in batches if there are
                 # too many local devices for a given user.
-                txn.execute(sql, [user_id] + devices)
+                txn.execute(sql, [user_id] + list(args))
                 for row in txn:
                     # Only insert into the local inbox if the device exists on
                     # this server
diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py
index 111bfb3d6..ac5239e50 100644
--- a/synapse/storage/devices.py
+++ b/synapse/storage/devices.py
@@ -28,7 +28,12 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage._base import Cache, SQLBaseStore, db_to_json
+from synapse.storage._base import (
+    Cache,
+    SQLBaseStore,
+    db_to_json,
+    make_in_list_sql_clause,
+)
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.util import batch_iter
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
@@ -448,11 +453,14 @@ class DeviceWorkerStore(SQLBaseStore):
             sql = """
                 SELECT DISTINCT user_id FROM device_lists_stream
                 WHERE stream_id > ?
-                AND user_id IN (%s)
+                AND
             """
 
             for chunk in batch_iter(to_check, 100):
-                txn.execute(sql % (",".join("?" for _ in chunk),), (from_key,) + chunk)
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "user_id", chunk
+                )
+                txn.execute(sql + clause, (from_key,) + tuple(args))
                 changes.update(user_id for user_id, in txn)
 
             return changes
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index f5e8c3926..47cc10d32 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -25,7 +25,7 @@ from twisted.internet import defer
 
 from synapse.api.errors import StoreError
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage._base import SQLBaseStore
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
 from synapse.storage.events_worker import EventsWorkerStore
 from synapse.storage.signatures import SignatureWorkerStore
 from synapse.util.caches.descriptors import cached
@@ -68,7 +68,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         else:
             results = set()
 
-        base_sql = "SELECT auth_id FROM event_auth WHERE event_id IN (%s)"
+        base_sql = "SELECT auth_id FROM event_auth WHERE "
 
         front = set(event_ids)
         while front:
@@ -76,7 +76,10 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             front_list = list(front)
             chunks = [front_list[x : x + 100] for x in range(0, len(front), 100)]
             for chunk in chunks:
-                txn.execute(base_sql % (",".join(["?"] * len(chunk)),), chunk)
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "event_id", chunk
+                )
+                txn.execute(base_sql + clause, list(args))
                 new_front.update([r[0] for r in txn])
 
             new_front -= results
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index bb6ff0595..ee49ef235 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -39,6 +39,7 @@ from synapse.logging.utils import log_function
 from synapse.metrics import BucketCollector
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.state import StateResolutionStore
+from synapse.storage._base import make_in_list_sql_clause
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.event_federation import EventFederationStore
 from synapse.storage.events_worker import EventsWorkerStore
@@ -641,14 +642,16 @@ class EventsStore(
                 LEFT JOIN rejections USING (event_id)
                 LEFT JOIN event_json USING (event_id)
             WHERE
-                prev_event_id IN (%s)
-                AND NOT events.outlier
+                NOT events.outlier
                 AND rejections.event_id IS NULL
-            """ % (
-                ",".join("?" for _ in batch),
+                AND
+            """
+
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "prev_event_id", batch
             )
 
-            txn.execute(sql, batch)
+            txn.execute(sql + clause, args)
             results.extend(r[0] for r in txn if not json.loads(r[1]).get("soft_failed"))
 
         for chunk in batch_iter(event_ids, 100):
@@ -695,13 +698,15 @@ class EventsStore(
                     LEFT JOIN rejections USING (event_id)
                     LEFT JOIN event_json USING (event_id)
                 WHERE
-                    event_id IN (%s)
-                    AND NOT events.outlier
-                """ % (
-                    ",".join("?" for _ in to_recursively_check),
+                    NOT events.outlier
+                    AND
+                """
+
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "event_id", to_recursively_check
                 )
 
-                txn.execute(sql, to_recursively_check)
+                txn.execute(sql + clause, args)
                 to_recursively_check = []
 
                 for event_id, prev_event_id, metadata, rejected in txn:
@@ -1543,10 +1548,14 @@ class EventsStore(
                 " FROM events as e"
                 " LEFT JOIN rejections as rej USING (event_id)"
                 " LEFT JOIN redactions as r ON e.event_id = r.redacts"
-                " WHERE e.event_id IN (%s)"
-            ) % (",".join(["?"] * len(ev_map)),)
+                " WHERE "
+            )
 
-            txn.execute(sql, list(ev_map))
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "e.event_id", list(ev_map)
+            )
+
+            txn.execute(sql + clause, args)
             rows = self.cursor_to_dict(txn)
             for row in rows:
                 event = ev_map[row["event_id"]]
@@ -2249,11 +2258,12 @@ class EventsStore(
             sql = """
                 SELECT DISTINCT state_group FROM event_to_state_groups
                 LEFT JOIN events_to_purge AS ep USING (event_id)
-                WHERE state_group IN (%s) AND ep.event_id IS NULL
-            """ % (
-                ",".join("?" for _ in current_search),
+                WHERE ep.event_id IS NULL AND
+            """
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "state_group", current_search
             )
-            txn.execute(sql, list(current_search))
+            txn.execute(sql + clause, list(args))
 
             referenced = set(sg for sg, in txn)
             referenced_groups |= referenced
diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py
index 5717baf48..97728a6da 100644
--- a/synapse/storage/events_bg_updates.py
+++ b/synapse/storage/events_bg_updates.py
@@ -21,6 +21,7 @@ from canonicaljson import json
 
 from twisted.internet import defer
 
+from synapse.storage._base import make_in_list_sql_clause
 from synapse.storage.background_updates import BackgroundUpdateStore
 
 logger = logging.getLogger(__name__)
@@ -312,12 +313,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
                     INNER JOIN event_json USING (event_id)
                     LEFT JOIN rejections USING (event_id)
                     WHERE
-                        prev_event_id IN (%s)
-                        AND NOT events.outlier
-                """ % (
-                    ",".join("?" for _ in to_check),
+                        NOT events.outlier
+                        AND
+                """
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "prev_event_id", to_check
                 )
-                txn.execute(sql, to_check)
+                txn.execute(sql + clause, list(args))
 
                 for prev_event_id, event_id, metadata, rejected in txn:
                     if event_id in graph:
diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index 57ce0304e..4c4b76bd9 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -31,12 +31,11 @@ from synapse.events.snapshot import EventContext  # noqa: F401
 from synapse.events.utils import prune_event
 from synapse.logging.context import LoggingContext, PreserveLoggingContext
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
 from synapse.types import get_domain_from_id
 from synapse.util import batch_iter
 from synapse.util.metrics import Measure
 
-from ._base import SQLBaseStore
-
 logger = logging.getLogger(__name__)
 
 
@@ -623,10 +622,14 @@ class EventsWorkerStore(SQLBaseStore):
                 " rej.reason "
                 " FROM event_json as e"
                 " LEFT JOIN rejections as rej USING (event_id)"
-                " WHERE e.event_id IN (%s)"
-            ) % (",".join(["?"] * len(evs)),)
+                " WHERE "
+            )
 
-            txn.execute(sql, evs)
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "e.event_id", evs
+            )
+
+            txn.execute(sql + clause, args)
 
             for row in txn:
                 event_id = row[0]
@@ -640,11 +643,11 @@ class EventsWorkerStore(SQLBaseStore):
                 }
 
             # check for redactions
-            redactions_sql = (
-                "SELECT event_id, redacts FROM redactions WHERE redacts IN (%s)"
-            ) % (",".join(["?"] * len(evs)),)
+            redactions_sql = "SELECT event_id, redacts FROM redactions WHERE "
 
-            txn.execute(redactions_sql, evs)
+            clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs)
+
+            txn.execute(redactions_sql + clause, args)
 
             for (redacter, redacted) in txn:
                 d = event_dict.get(redacted)
@@ -753,10 +756,11 @@ class EventsWorkerStore(SQLBaseStore):
         results = set()
 
         def have_seen_events_txn(txn, chunk):
-            sql = "SELECT event_id FROM events as e WHERE e.event_id IN (%s)" % (
-                ",".join("?" * len(chunk)),
+            sql = "SELECT event_id FROM events as e WHERE "
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "e.event_id", chunk
             )
-            txn.execute(sql, chunk)
+            txn.execute(sql + clause, args)
             for (event_id,) in txn:
                 results.add(event_id)
 
diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py
index 5db6f2d84..3a641f538 100644
--- a/synapse/storage/presence.py
+++ b/synapse/storage/presence.py
@@ -18,11 +18,10 @@ from collections import namedtuple
 from twisted.internet import defer
 
 from synapse.api.constants import PresenceState
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
 from synapse.util import batch_iter
 from synapse.util.caches.descriptors import cached, cachedList
 
-from ._base import SQLBaseStore
-
 
 class UserPresenceState(
     namedtuple(
@@ -119,14 +118,13 @@ class PresenceStore(SQLBaseStore):
         )
 
         # Delete old rows to stop database from getting really big
-        sql = (
-            "DELETE FROM presence_stream WHERE" " stream_id < ?" " AND user_id IN (%s)"
-        )
+        sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
 
         for states in batch_iter(presence_states, 50):
-            args = [stream_id]
-            args.extend(s.user_id for s in states)
-            txn.execute(sql % (",".join("?" for _ in states),), args)
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "user_id", [s.user_id for s in states]
+            )
+            txn.execute(sql + clause, [stream_id] + list(args))
 
     def get_all_presence_updates(self, last_id, current_id):
         if last_id == current_id:
diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py
index 290ddb30e..0c24430f2 100644
--- a/synapse/storage/receipts.py
+++ b/synapse/storage/receipts.py
@@ -21,12 +21,11 @@ from canonicaljson import json
 
 from twisted.internet import defer
 
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.util.id_generators import StreamIdGenerator
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import SQLBaseStore
-from .util.id_generators import StreamIdGenerator
-
 logger = logging.getLogger(__name__)
 
 
@@ -217,24 +216,26 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         def f(txn):
             if from_key:
-                sql = (
-                    "SELECT * FROM receipts_linearized WHERE"
-                    " room_id IN (%s) AND stream_id > ? AND stream_id <= ?"
-                ) % (",".join(["?"] * len(room_ids)))
-                args = list(room_ids)
-                args.extend([from_key, to_key])
+                sql = """
+                    SELECT * FROM receipts_linearized WHERE
+                    stream_id > ? AND stream_id <= ? AND
+                """
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "room_id", room_ids
+                )
 
-                txn.execute(sql, args)
+                txn.execute(sql + clause, [from_key, to_key] + list(args))
             else:
-                sql = (
-                    "SELECT * FROM receipts_linearized WHERE"
-                    " room_id IN (%s) AND stream_id <= ?"
-                ) % (",".join(["?"] * len(room_ids)))
+                sql = """
+                    SELECT * FROM receipts_linearized WHERE
+                    stream_id <= ? AND
+                """
 
-                args = list(room_ids)
-                args.append(to_key)
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "room_id", room_ids
+                )
 
-                txn.execute(sql, args)
+                txn.execute(sql + clause, [to_key] + list(args))
 
             return self.cursor_to_dict(txn)
 
@@ -433,13 +434,19 @@ class ReceiptsStore(ReceiptsWorkerStore):
             # we need to points in graph -> linearized form.
             # TODO: Make this better.
             def graph_to_linear(txn):
-                query = (
-                    "SELECT event_id WHERE room_id = ? AND stream_ordering IN ("
-                    " SELECT max(stream_ordering) WHERE event_id IN (%s)"
-                    ")"
-                ) % (",".join(["?"] * len(event_ids)))
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "event_id", event_ids
+                )
 
-                txn.execute(query, [room_id] + event_ids)
+                sql = """
+                    SELECT event_id WHERE room_id = ? AND stream_ordering IN (
+                        SELECT max(stream_ordering) WHERE %s
+                    )
+                """ % (
+                    clause,
+                )
+
+                txn.execute(sql, [room_id] + list(args))
                 rows = txn.fetchall()
                 if rows:
                     return rows[0][0]
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 4e606a838..ff6348782 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -26,7 +26,7 @@ from twisted.internet import defer
 from synapse.api.constants import EventTypes, Membership
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage._base import LoggingTransaction
+from synapse.storage._base import LoggingTransaction, make_in_list_sql_clause
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.engines import Sqlite3Engine
 from synapse.storage.events_worker import EventsWorkerStore
@@ -372,6 +372,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         results = []
         if membership_list:
             if self._current_state_events_membership_up_to_date:
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "c.membership", membership_list
+                )
                 sql = """
                     SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering
                     FROM current_state_events AS c
@@ -379,11 +382,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                     WHERE
                         c.type = 'm.room.member'
                         AND state_key = ?
-                        AND c.membership IN (%s)
+                        AND %s
                 """ % (
-                    ",".join("?" * len(membership_list))
+                    clause,
                 )
             else:
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "m.membership", membership_list
+                )
                 sql = """
                     SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering
                     FROM current_state_events AS c
@@ -392,12 +398,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                     WHERE
                         c.type = 'm.room.member'
                         AND state_key = ?
-                        AND m.membership IN (%s)
+                        AND %s
                 """ % (
-                    ",".join("?" * len(membership_list))
+                    clause,
                 )
 
-            txn.execute(sql, (user_id, *membership_list))
+            txn.execute(sql, (user_id, *args))
             results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)]
 
         if do_invite:
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index 6ba4190f1..4be6e56df 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -24,6 +24,7 @@ from canonicaljson import json
 from twisted.internet import defer
 
 from synapse.api.errors import SynapseError
+from synapse.storage._base import add_in_list_sql_clause
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 
 from .background_updates import BackgroundUpdateStore
@@ -385,8 +386,9 @@ class SearchStore(SearchBackgroundUpdateStore):
         # Make sure we don't explode because the person is in too many rooms.
         # We filter the results below regardless.
         if len(room_ids) < 500:
-            clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),))
-            args.extend(room_ids)
+            add_in_list_sql_clause(
+                self.database_engine, "room_id", room_ids, clauses, args
+            )
 
         local_clauses = []
         for key in keys:
@@ -492,8 +494,9 @@ class SearchStore(SearchBackgroundUpdateStore):
         # Make sure we don't explode because the person is in too many rooms.
         # We filter the results below regardless.
         if len(room_ids) < 500:
-            clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),))
-            args.extend(room_ids)
+            add_in_list_sql_clause(
+                self.database_engine, "room_id", room_ids, clauses, args
+            )
 
         local_clauses = []
         for key in keys:
diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/user_erasure_store.py
index 05cabc228..aa4f0da5f 100644
--- a/synapse/storage/user_erasure_store.py
+++ b/synapse/storage/user_erasure_store.py
@@ -56,15 +56,15 @@ class UserErasureWorkerStore(SQLBaseStore):
         # iterate it multiple times, and (b) avoiding duplicates.
         user_ids = tuple(set(user_ids))
 
-        def _get_erased_users(txn):
-            txn.execute(
-                "SELECT user_id FROM erased_users WHERE user_id IN (%s)"
-                % (",".join("?" * len(user_ids))),
-                user_ids,
-            )
-            return set(r[0] for r in txn)
+        rows = yield self._simple_select_many_batch(
+            table="erased_users",
+            column="user_id",
+            iterable=user_ids,
+            retcols=("user_id",),
+            desc="are_users_erased",
+        )
+        erased_users = set(row["user_id"] for row in rows)
 
-        erased_users = yield self.runInteraction("are_users_erased", _get_erased_users)
         res = dict((u, u in erased_users) for u in user_ids)
         return res
 

From 203ccdac5fa50888df3261c419c6b9fd670b21e5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 2 Oct 2019 19:09:54 +0100
Subject: [PATCH 243/276] Newsfile

---
 changelog.d/6156.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6156.misc

diff --git a/changelog.d/6156.misc b/changelog.d/6156.misc
new file mode 100644
index 000000000..49525e941
--- /dev/null
+++ b/changelog.d/6156.misc
@@ -0,0 +1 @@
+Use Postgres ANY for selecting many values.

From fe1c1e6c28e09f88b30e0587161f9b1dbd6e8acf Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 13:17:19 +0100
Subject: [PATCH 244/276] Fixup comments

Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 synapse/util/patch_inline_callbacks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index 66c3d4751..3925927f9 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -114,7 +114,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
     """Wraps a generator that is about to be passed to defer.inlineCallbacks
     checking that after every yield the log contexts are correct.
 
-    Its perfectly valid for log contexts to change within a function, e.g. due
+    It's perfectly valid for log contexts to change within a function, e.g. due
     to new Measure blocks, so such changes are added to the given `changes`
     list instead of triggering an exception.
 
@@ -150,7 +150,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
                     # final yield and returning. E.g. we forgot to yield on a
                     # function that returns a deferred.
                     #
-                    # We don't raise here as its perfectly valid for contexts to
+                    # We don't raise here as it's perfectly valid for contexts to
                     # change in a function, as long as it sets the correct context
                     # on resolving (which is checked separately).
                     err = (
@@ -171,7 +171,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
 
             if isinstance(d, defer.Deferred) and not d.called:
                 # This happens if we yield on a deferred that doesn't follow
-                # the log context rules without wrappin in a `make_deferred_yieldable`.
+                # the log context rules without wrapping in a `make_deferred_yieldable`.
                 # We raise here as this should never happen.
                 if LoggingContext.current_context() is not LoggingContext.sentinel:
                     err = (

From 5373de6cced56c983098c82872cf17c311abdb96 Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Thu, 10 Oct 2019 08:54:07 -0400
Subject: [PATCH 245/276] change test name to be unique

---
 tests/handlers/test_e2e_room_keys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/handlers/test_e2e_room_keys.py b/tests/handlers/test_e2e_room_keys.py
index c700a2fad..0bb96674a 100644
--- a/tests/handlers/test_e2e_room_keys.py
+++ b/tests/handlers/test_e2e_room_keys.py
@@ -187,7 +187,7 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase):
         self.assertEqual(res, 404)
 
     @defer.inlineCallbacks
-    def test_update_missing_version(self):
+    def test_update_omitted_version(self):
         """Check that the update succeeds if the version is missing from the body
         """
         version = yield self.handler.create_version(

From 430dc2c67b20bf4abff74f861d8dce78f880ec73 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 10 Oct 2019 14:05:30 +0100
Subject: [PATCH 246/276] Fix python packaging

... after it got borked by #6081
---
 MANIFEST.in           | 4 +---
 changelog.d/6191.misc | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/6191.misc

diff --git a/MANIFEST.in b/MANIFEST.in
index 9c2902b8d..b22be58f3 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -47,7 +47,5 @@ prune debian
 prune demo/etc
 prune docker
 prune mypy.ini
+prune snap
 prune stubs
-
-exclude jenkins*
-recursive-exclude jenkins *.sh
diff --git a/changelog.d/6191.misc b/changelog.d/6191.misc
new file mode 100644
index 000000000..3c3370165
--- /dev/null
+++ b/changelog.d/6191.misc
@@ -0,0 +1 @@
+Add snapcraft packaging information. Contributed by @devec0.

From ca3e01e50d2caca6a55b7c7808f0e948b430363d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 14:52:29 +0100
Subject: [PATCH 247/276] Fix store_url_cache using bytes

---
 synapse/rest/media/v1/preview_url_resource.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 7a56cd4b6..0c68c3aad 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -270,7 +270,7 @@ class PreviewUrlResource(DirectServeResource):
 
         logger.debug("Calculated OG for %s as %s" % (url, og))
 
-        jsonog = json.dumps(og).encode("utf8")
+        jsonog = json.dumps(og)
 
         # store OG in history-aware DB cache
         yield self.store.store_url_cache(
@@ -283,7 +283,7 @@ class PreviewUrlResource(DirectServeResource):
             media_info["created_ts"],
         )
 
-        return jsonog
+        return jsonog.encode("utf8")
 
     @defer.inlineCallbacks
     def _download_url(self, url, user):

From 3bc687508fa6c4cf82b5ddb22ce6f3674433d0ff Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 15:35:46 +0100
Subject: [PATCH 248/276] Remove add_in_list_sql_clause

---
 synapse/storage/_base.py            | 35 +++++------------------------
 synapse/storage/engines/postgres.py |  6 +++++
 synapse/storage/engines/sqlite.py   |  6 +++++
 synapse/storage/search.py           | 12 +++++-----
 4 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 085b8ae87..6176838aa 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -20,7 +20,7 @@ import random
 import sys
 import threading
 import time
-from typing import Iterable, List, Tuple
+from typing import Iterable, Tuple
 
 from six import PY2, iteritems, iterkeys, itervalues
 from six.moves import builtins, intern, range
@@ -1164,10 +1164,8 @@ class SQLBaseStore(object):
         if not iterable:
             return []
 
-        clauses = []
-        values = []
-
-        add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values)
+        clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable)
+        clauses = [clause]
 
         for key, value in iteritems(keyvalues):
             clauses.append("%s = ?" % (key,))
@@ -1326,10 +1324,8 @@ class SQLBaseStore(object):
 
         sql = "DELETE FROM %s" % table
 
-        clauses = []
-        values = []
-
-        add_in_list_sql_clause(txn.database_engine, column, iterable, clauses, values)
+        clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable)
+        clauses = [clause]
 
         for key, value in iteritems(keyvalues):
             clauses.append("%s = ?" % (key,))
@@ -1698,25 +1694,6 @@ def db_to_json(db_content):
         raise
 
 
-def add_in_list_sql_clause(
-    database_engine, column: str, iterable: Iterable, clauses: List[str], args: List
-):
-    """Adds an SQL clause to the given list of clauses/args that checks the
-    given column is in the iterable. c.f. `make_in_list_sql_clause`
-
-    Args:
-        database_engine
-        column: Name of the column
-        iterable: The values to check the column against.
-        clauses: A list to add the expanded clause to
-        args: A list of arguments that we append the args to.
-    """
-
-    clause, new_args = make_in_list_sql_clause(database_engine, column, iterable)
-    clauses.append(clause)
-    args.extend(new_args)
-
-
 def make_in_list_sql_clause(
     database_engine, column: str, iterable: Iterable
 ) -> Tuple[str, Iterable]:
@@ -1736,7 +1713,7 @@ def make_in_list_sql_clause(
         A tuple of SQL query and the args
     """
 
-    if isinstance(database_engine, PostgresEngine):
+    if database_engine.supports_using_any_list:
         # This should hopefully be faster, but also makes postgres query
         # stats easier to understand.
         return "%s = ANY(?)" % (column,), [list(iterable)]
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 601617b21..f36600b4b 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -79,6 +79,12 @@ class PostgresEngine(object):
         """
         return True
 
+    @property
+    def supports_using_any_list(self):
+        """Do we support using `a = ANY(?)` and passing a list
+        """
+        return True
+
     def is_deadlock(self, error):
         if isinstance(error, self.module.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index ac9210936..252625806 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -46,6 +46,12 @@ class Sqlite3Engine(object):
         """
         return self.module.sqlite_version_info >= (3, 15, 0)
 
+    @property
+    def supports_any_list(self):
+        """Do we support using `a = ANY(?)` and passing a list
+        """
+        return False
+
     def check_database(self, txn):
         pass
 
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index 4be6e56df..7695bf09f 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -24,7 +24,7 @@ from canonicaljson import json
 from twisted.internet import defer
 
 from synapse.api.errors import SynapseError
-from synapse.storage._base import add_in_list_sql_clause
+from synapse.storage._base import make_in_list_sql_clause
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 
 from .background_updates import BackgroundUpdateStore
@@ -386,9 +386,10 @@ class SearchStore(SearchBackgroundUpdateStore):
         # Make sure we don't explode because the person is in too many rooms.
         # We filter the results below regardless.
         if len(room_ids) < 500:
-            add_in_list_sql_clause(
-                self.database_engine, "room_id", room_ids, clauses, args
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "room_id", room_ids
             )
+            clauses = [clause]
 
         local_clauses = []
         for key in keys:
@@ -494,9 +495,10 @@ class SearchStore(SearchBackgroundUpdateStore):
         # Make sure we don't explode because the person is in too many rooms.
         # We filter the results below regardless.
         if len(room_ids) < 500:
-            add_in_list_sql_clause(
-                self.database_engine, "room_id", room_ids, clauses, args
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "room_id", room_ids
             )
+            clauses = [clause]
 
         local_clauses = []
         for key in keys:

From bc244627ac759bbd4691a2a20ac16383b4c2348c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 15:37:53 +0100
Subject: [PATCH 249/276] Fix postgres unit tests

---
 tests/storage/test_event_federation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index b58386994..2fe50377f 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -57,7 +57,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
                     "(event_id, algorithm, hash) "
                     "VALUES (?, 'sha256', ?)"
                 ),
-                (event_id, b"ffff"),
+                (event_id, bytearray(b"ffff")),
             )
 
         for i in range(0, 11):

From afb6d9d53b417ff3b651767ab88bf63606e7225e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 15:55:41 +0100
Subject: [PATCH 250/276] Fix SQLite

---
 synapse/storage/engines/sqlite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 252625806..ddad17dc5 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -47,7 +47,7 @@ class Sqlite3Engine(object):
         return self.module.sqlite_version_info >= (3, 15, 0)
 
     @property
-    def supports_any_list(self):
+    def supports_using_any_list(self):
         """Do we support using `a = ANY(?)` and passing a list
         """
         return False

From b54b1e759a9bc6517d5a31e3ea732cecb307d4c6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 10 Oct 2019 16:19:40 +0100
Subject: [PATCH 251/276] Fix SQLite take 2

---
 synapse/storage/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 6176838aa..f5906fcd5 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -1718,4 +1718,4 @@ def make_in_list_sql_clause(
         # stats easier to understand.
         return "%s = ANY(?)" % (column,), [list(iterable)]
     else:
-        return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), iterable
+        return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable)

From 4908fb3b30ac007fda5993521448804067751a6d Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Thu, 10 Oct 2019 15:56:00 -0400
Subject: [PATCH 252/276] make storage layer in charge of interpreting the
 device key data

---
 synapse/handlers/e2e_keys.py          | 11 -----------
 synapse/storage/end_to_end_keys.py    | 11 +++++++++--
 tests/storage/test_end_to_end_keys.py | 12 ++++++------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 056fb97ac..6708d983a 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -248,17 +248,6 @@ class E2eKeysHandler(object):
 
         results = yield self.store.get_e2e_device_keys(local_query)
 
-        # Build the result structure, un-jsonify the results, and add the
-        # "unsigned" section
-        for user_id, device_keys in results.items():
-            for device_id, device_info in device_keys.items():
-                r = dict(device_info["keys"])
-                r["unsigned"] = {}
-                display_name = device_info["device_display_name"]
-                if display_name is not None:
-                    r["unsigned"]["device_display_name"] = display_name
-                result_dict[user_id][device_id] = r
-
         log_kv(results)
         return result_dict
 
diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py
index 33e3a8493..d802d7a48 100644
--- a/synapse/storage/end_to_end_keys.py
+++ b/synapse/storage/end_to_end_keys.py
@@ -40,7 +40,7 @@ class EndToEndKeyWorkerStore(SQLBaseStore):
                 This option only takes effect if include_all_devices is true.
         Returns:
             Dict mapping from user-id to dict mapping from device_id to
-            dict containing "key_json", "device_display_name".
+            key data.
         """
         set_tag("query_list", query_list)
         if not query_list:
@@ -54,9 +54,16 @@ class EndToEndKeyWorkerStore(SQLBaseStore):
             include_deleted_devices,
         )
 
+        # Build the result structure, un-jsonify the results, and add the
+        # "unsigned" section
         for user_id, device_keys in iteritems(results):
             for device_id, device_info in iteritems(device_keys):
-                device_info["keys"] = db_to_json(device_info.pop("key_json"))
+                r = db_to_json(device_info.pop("key_json"))
+                r["unsigned"] = {}
+                display_name = device_info["device_display_name"]
+                if display_name is not None:
+                    r["unsigned"]["device_display_name"] = display_name
+                results[user_id][device_id] = r
 
         return results
 
diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py
index c8ece1528..398d54628 100644
--- a/tests/storage/test_end_to_end_keys.py
+++ b/tests/storage/test_end_to_end_keys.py
@@ -38,7 +38,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase):
         self.assertIn("user", res)
         self.assertIn("device", res["user"])
         dev = res["user"]["device"]
-        self.assertDictContainsSubset({"keys": json, "device_display_name": None}, dev)
+        self.assertDictContainsSubset(json, dev)
 
     @defer.inlineCallbacks
     def test_reupload_key(self):
@@ -68,7 +68,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase):
         self.assertIn("device", res["user"])
         dev = res["user"]["device"]
         self.assertDictContainsSubset(
-            {"keys": json, "device_display_name": "display_name"}, dev
+            {"key": "value", "unsigned": {"device_display_name": "display_name"}}, dev
         )
 
     @defer.inlineCallbacks
@@ -80,10 +80,10 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase):
         yield self.store.store_device("user2", "device1", None)
         yield self.store.store_device("user2", "device2", None)
 
-        yield self.store.set_e2e_device_keys("user1", "device1", now, "json11")
-        yield self.store.set_e2e_device_keys("user1", "device2", now, "json12")
-        yield self.store.set_e2e_device_keys("user2", "device1", now, "json21")
-        yield self.store.set_e2e_device_keys("user2", "device2", now, "json22")
+        yield self.store.set_e2e_device_keys("user1", "device1", now, {"key": "json11"})
+        yield self.store.set_e2e_device_keys("user1", "device2", now, {"key": "json12"})
+        yield self.store.set_e2e_device_keys("user2", "device1", now, {"key": "json21"})
+        yield self.store.set_e2e_device_keys("user2", "device2", now, {"key": "json22"})
 
         res = yield self.store.get_e2e_device_keys(
             (("user1", "device1"), ("user2", "device2"))

From 2208891ace3b1d9db148d606fca2b3f784a4257a Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Thu, 10 Oct 2019 19:22:10 -0400
Subject: [PATCH 253/276] add changelog

---
 changelog.d/6193.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6193.misc

diff --git a/changelog.d/6193.misc b/changelog.d/6193.misc
new file mode 100644
index 000000000..8e3707f8f
--- /dev/null
+++ b/changelog.d/6193.misc
@@ -0,0 +1 @@
+Make storage layer responsible for adding device names to key, rather than the handler.

From 7a0dce92594d05179234095899c3d09a8a744cbb Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Thu, 10 Oct 2019 20:31:30 -0400
Subject: [PATCH 254/276] make sure we actually return something

---
 synapse/handlers/e2e_keys.py       | 5 +++++
 synapse/storage/end_to_end_keys.py | 6 ++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 6708d983a..0a84d0e2b 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -248,6 +248,11 @@ class E2eKeysHandler(object):
 
         results = yield self.store.get_e2e_device_keys(local_query)
 
+        # Build the result structure
+        for user_id, device_keys in results.items():
+            for device_id, device_info in device_keys.items():
+                result_dict[user_id][device_id] = device_info
+
         log_kv(results)
         return result_dict
 
diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py
index d802d7a48..b00a391c8 100644
--- a/synapse/storage/end_to_end_keys.py
+++ b/synapse/storage/end_to_end_keys.py
@@ -56,16 +56,18 @@ class EndToEndKeyWorkerStore(SQLBaseStore):
 
         # Build the result structure, un-jsonify the results, and add the
         # "unsigned" section
+        rv = {}
         for user_id, device_keys in iteritems(results):
+            rv[user_id] = {}
             for device_id, device_info in iteritems(device_keys):
                 r = db_to_json(device_info.pop("key_json"))
                 r["unsigned"] = {}
                 display_name = device_info["device_display_name"]
                 if display_name is not None:
                     r["unsigned"]["device_display_name"] = display_name
-                results[user_id][device_id] = r
+                rv[user_id][device_id] = r
 
-        return results
+        return rv
 
     @trace
     def _get_e2e_device_keys_txn(

From a0d0ba7862e38588aa0d0ac29a720fdf06f1ab8d Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Fri, 11 Oct 2019 09:38:26 +0100
Subject: [PATCH 255/276] Fix MAU reaping where reserved users are specified.
 (#6168)

---
 changelog.d/6168.bugfix                    |   1 +
 synapse/app/homeserver.py                  |   6 +-
 synapse/storage/monthly_active_users.py    | 101 +++++++++++++--------
 tests/storage/test_monthly_active_users.py |  58 ++++++++++--
 4 files changed, 115 insertions(+), 51 deletions(-)
 create mode 100644 changelog.d/6168.bugfix

diff --git a/changelog.d/6168.bugfix b/changelog.d/6168.bugfix
new file mode 100644
index 000000000..39e8e9d01
--- /dev/null
+++ b/changelog.d/6168.bugfix
@@ -0,0 +1 @@
+Fix monthly active user reaping where reserved users are specified.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 774326dff..eb54f5685 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -605,13 +605,13 @@ def run(hs):
     @defer.inlineCallbacks
     def generate_monthly_active_users():
         current_mau_count = 0
-        reserved_count = 0
+        reserved_users = ()
         store = hs.get_datastore()
         if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
             current_mau_count = yield store.get_monthly_active_count()
-            reserved_count = yield store.get_registered_reserved_users_count()
+            reserved_users = yield store.get_registered_reserved_users()
         current_mau_gauge.set(float(current_mau_count))
-        registered_reserved_users_mau_gauge.set(float(reserved_count))
+        registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
         max_mau_gauge.set(float(hs.config.max_mau_value))
 
     def start_generate_monthly_active_users():
diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py
index 752e9788a..3803604be 100644
--- a/synapse/storage/monthly_active_users.py
+++ b/synapse/storage/monthly_active_users.py
@@ -32,7 +32,6 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         super(MonthlyActiveUsersStore, self).__init__(None, hs)
         self._clock = hs.get_clock()
         self.hs = hs
-        self.reserved_users = ()
         # Do not add more reserved users than the total allowable number
         self._new_transaction(
             dbconn,
@@ -51,7 +50,6 @@ class MonthlyActiveUsersStore(SQLBaseStore):
             txn (cursor):
             threepids (list[dict]): List of threepid dicts to reserve
         """
-        reserved_user_list = []
 
         for tp in threepids:
             user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"])
@@ -60,10 +58,8 @@ class MonthlyActiveUsersStore(SQLBaseStore):
                 is_support = self.is_support_user_txn(txn, user_id)
                 if not is_support:
                     self.upsert_monthly_active_user_txn(txn, user_id)
-                    reserved_user_list.append(user_id)
             else:
                 logger.warning("mau limit reserved threepid %s not found in db" % tp)
-        self.reserved_users = tuple(reserved_user_list)
 
     @defer.inlineCallbacks
     def reap_monthly_active_users(self):
@@ -74,8 +70,11 @@ class MonthlyActiveUsersStore(SQLBaseStore):
             Deferred[]
         """
 
-        def _reap_users(txn):
-            # Purge stale users
+        def _reap_users(txn, reserved_users):
+            """
+            Args:
+                reserved_users (tuple): reserved users to preserve
+            """
 
             thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
             query_args = [thirty_days_ago]
@@ -83,20 +82,19 @@ class MonthlyActiveUsersStore(SQLBaseStore):
 
             # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
             # when len(reserved_users) == 0. Works fine on sqlite.
-            if len(self.reserved_users) > 0:
+            if len(reserved_users) > 0:
                 # questionmarks is a hack to overcome sqlite not supporting
                 # tuples in 'WHERE IN %s'
-                questionmarks = "?" * len(self.reserved_users)
+                question_marks = ",".join("?" * len(reserved_users))
 
-                query_args.extend(self.reserved_users)
-                sql = base_sql + """ AND user_id NOT IN ({})""".format(
-                    ",".join(questionmarks)
-                )
+                query_args.extend(reserved_users)
+                sql = base_sql + " AND user_id NOT IN ({})".format(question_marks)
             else:
                 sql = base_sql
 
             txn.execute(sql, query_args)
 
+            max_mau_value = self.hs.config.max_mau_value
             if self.hs.config.limit_usage_by_mau:
                 # If MAU user count still exceeds the MAU threshold, then delete on
                 # a least recently active basis.
@@ -106,31 +104,52 @@ class MonthlyActiveUsersStore(SQLBaseStore):
                 # While Postgres does not require 'LIMIT', but also does not support
                 # negative LIMIT values. So there is no way to write it that both can
                 # support
-                safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
-                # Must be greater than zero for postgres
-                safe_guard = safe_guard if safe_guard > 0 else 0
-                query_args = [safe_guard]
-
-                base_sql = """
-                    DELETE FROM monthly_active_users
-                    WHERE user_id NOT IN (
-                        SELECT user_id FROM monthly_active_users
-                        ORDER BY timestamp DESC
-                        LIMIT ?
+                if len(reserved_users) == 0:
+                    sql = """
+                        DELETE FROM monthly_active_users
+                        WHERE user_id NOT IN (
+                            SELECT user_id FROM monthly_active_users
+                            ORDER BY timestamp DESC
+                            LIMIT ?
                         )
-                    """
+                        """
+                    txn.execute(sql, (max_mau_value,))
                 # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
                 # when len(reserved_users) == 0. Works fine on sqlite.
-                if len(self.reserved_users) > 0:
-                    query_args.extend(self.reserved_users)
-                    sql = base_sql + """ AND user_id NOT IN ({})""".format(
-                        ",".join(questionmarks)
-                    )
                 else:
-                    sql = base_sql
-                txn.execute(sql, query_args)
+                    # Must be >= 0 for postgres
+                    num_of_non_reserved_users_to_remove = max(
+                        max_mau_value - len(reserved_users), 0
+                    )
 
-        yield self.runInteraction("reap_monthly_active_users", _reap_users)
+                    # It is important to filter reserved users twice to guard
+                    # against the case where the reserved user is present in the
+                    # SELECT, meaning that a legitmate mau is deleted.
+                    sql = """
+                        DELETE FROM monthly_active_users
+                        WHERE user_id NOT IN (
+                            SELECT user_id FROM monthly_active_users
+                            WHERE user_id NOT IN ({})
+                            ORDER BY timestamp DESC
+                            LIMIT ?
+                        )
+                        AND user_id NOT IN ({})
+                    """.format(
+                        question_marks, question_marks
+                    )
+
+                    query_args = [
+                        *reserved_users,
+                        num_of_non_reserved_users_to_remove,
+                        *reserved_users,
+                    ]
+
+                    txn.execute(sql, query_args)
+
+        reserved_users = yield self.get_registered_reserved_users()
+        yield self.runInteraction(
+            "reap_monthly_active_users", _reap_users, reserved_users
+        )
         # It seems poor to invalidate the whole cache, Postgres supports
         # 'Returning' which would allow me to invalidate only the
         # specific users, but sqlite has no way to do this and instead
@@ -159,21 +178,25 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         return self.runInteraction("count_users", _count_users)
 
     @defer.inlineCallbacks
-    def get_registered_reserved_users_count(self):
-        """Of the reserved threepids defined in config, how many are associated
+    def get_registered_reserved_users(self):
+        """Of the reserved threepids defined in config, which are associated
         with registered users?
 
         Returns:
-            Defered[int]: Number of real reserved users
+            Defered[list]: Real reserved users
         """
-        count = 0
-        for tp in self.hs.config.mau_limits_reserved_threepids:
+        users = []
+
+        for tp in self.hs.config.mau_limits_reserved_threepids[
+            : self.hs.config.max_mau_value
+        ]:
             user_id = yield self.hs.get_datastore().get_user_id_by_threepid(
                 tp["medium"], tp["address"]
             )
             if user_id:
-                count = count + 1
-        return count
+                users.append(user_id)
+
+        return users
 
     @defer.inlineCallbacks
     def upsert_monthly_active_user(self, user_id):
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index 1494650d1..90a63dc47 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -50,6 +50,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
             {"medium": "email", "address": user2_email},
             {"medium": "email", "address": user3_email},
         ]
+        self.hs.config.mau_limits_reserved_threepids = threepids
         # -1 because user3 is a support user and does not count
         user_num = len(threepids) - 1
 
@@ -84,6 +85,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.hs.config.max_mau_value = 0
 
         self.reactor.advance(FORTY_DAYS)
+        self.hs.config.max_mau_value = 5
 
         self.store.reap_monthly_active_users()
         self.pump()
@@ -147,9 +149,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.store.reap_monthly_active_users()
         self.pump()
         count = self.store.get_monthly_active_count()
-        self.assertEquals(
-            self.get_success(count), initial_users - self.hs.config.max_mau_value
-        )
+        self.assertEquals(self.get_success(count), self.hs.config.max_mau_value)
 
         self.reactor.advance(FORTY_DAYS)
         self.store.reap_monthly_active_users()
@@ -158,6 +158,44 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         count = self.store.get_monthly_active_count()
         self.assertEquals(self.get_success(count), 0)
 
+    def test_reap_monthly_active_users_reserved_users(self):
+        """ Tests that reaping correctly handles reaping where reserved users are
+        present"""
+
+        self.hs.config.max_mau_value = 5
+        initial_users = 5
+        reserved_user_number = initial_users - 1
+        threepids = []
+        for i in range(initial_users):
+            user = "@user%d:server" % i
+            email = "user%d@example.com" % i
+            self.get_success(self.store.upsert_monthly_active_user(user))
+            threepids.append({"medium": "email", "address": email})
+            # Need to ensure that the most recent entries in the
+            # monthly_active_users table are reserved
+            now = int(self.hs.get_clock().time_msec())
+            if i != 0:
+                self.get_success(
+                    self.store.register_user(user_id=user, password_hash=None)
+                )
+                self.get_success(
+                    self.store.user_add_threepid(user, "email", email, now, now)
+                )
+
+        self.hs.config.mau_limits_reserved_threepids = threepids
+        self.store.runInteraction(
+            "initialise", self.store._initialise_reserved_users, threepids
+        )
+        count = self.store.get_monthly_active_count()
+        self.assertTrue(self.get_success(count), initial_users)
+
+        users = self.store.get_registered_reserved_users()
+        self.assertEquals(len(self.get_success(users)), reserved_user_number)
+
+        self.get_success(self.store.reap_monthly_active_users())
+        count = self.store.get_monthly_active_count()
+        self.assertEquals(self.get_success(count), self.hs.config.max_mau_value)
+
     def test_populate_monthly_users_is_guest(self):
         # Test that guest users are not added to mau list
         user_id = "@user_id:host"
@@ -192,12 +230,13 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
     def test_get_reserved_real_user_account(self):
         # Test no reserved users, or reserved threepids
-        count = self.store.get_registered_reserved_users_count()
-        self.assertEquals(self.get_success(count), 0)
+        users = self.get_success(self.store.get_registered_reserved_users())
+        self.assertEquals(len(users), 0)
         # Test reserved users but no registered users
 
         user1 = "@user1:example.com"
         user2 = "@user2:example.com"
+
         user1_email = "user1@example.com"
         user2_email = "user2@example.com"
         threepids = [
@@ -210,8 +249,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         )
 
         self.pump()
-        count = self.store.get_registered_reserved_users_count()
-        self.assertEquals(self.get_success(count), 0)
+        users = self.get_success(self.store.get_registered_reserved_users())
+        self.assertEquals(len(users), 0)
 
         # Test reserved registed users
         self.store.register_user(user_id=user1, password_hash=None)
@@ -221,8 +260,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         now = int(self.hs.get_clock().time_msec())
         self.store.user_add_threepid(user1, "email", user1_email, now, now)
         self.store.user_add_threepid(user2, "email", user2_email, now, now)
-        count = self.store.get_registered_reserved_users_count()
-        self.assertEquals(self.get_success(count), len(threepids))
+
+        users = self.get_success(self.store.get_registered_reserved_users())
+        self.assertEquals(len(users), len(threepids))
 
     def test_support_user_not_add_to_mau_limits(self):
         support_user_id = "@support:test"

From f3ceaf432365bce77cff71116fb6c7d38e61c9ab Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 11:22:36 +0100
Subject: [PATCH 256/276] Trace non-JSON APIs, /media, /key etc

---
 synapse/http/server.py         |  2 +-
 synapse/logging/opentracing.py | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/synapse/http/server.py b/synapse/http/server.py
index cb9158fe1..2ccb210fd 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -388,7 +388,7 @@ class DirectServeResource(resource.Resource):
         if not callback:
             return super().render(request)
 
-        resp = callback(request)
+        resp = trace_servlet(self.__class__.__name__)(callback)(request)
 
         # If it's a coroutine, turn it into a Deferred
         if isinstance(resp, types.CoroutineType):
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index cd1ff6a51..0638cec42 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -169,6 +169,7 @@ import contextlib
 import inspect
 import logging
 import re
+import types
 from functools import wraps
 from typing import Dict
 
@@ -778,8 +779,7 @@ def trace_servlet(servlet_name, extract_context=False):
             return func
 
         @wraps(func)
-        @defer.inlineCallbacks
-        def _trace_servlet_inner(request, *args, **kwargs):
+        async def _trace_servlet_inner(request, *args, **kwargs):
             request_tags = {
                 "request_id": request.get_request_id(),
                 tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER,
@@ -796,8 +796,14 @@ def trace_servlet(servlet_name, extract_context=False):
                 scope = start_active_span(servlet_name, tags=request_tags)
 
             with scope:
-                result = yield defer.maybeDeferred(func, request, *args, **kwargs)
-                return result
+                result = func(request, *args, **kwargs)
+
+                if not isinstance(result, (types.CoroutineType, defer.Deferred)):
+                    # Some servlets aren't async and just return results
+                    # directly, so we handle that here.
+                    return result
+
+                return await result
 
         return _trace_servlet_inner
 

From de3a1764266536fdc4bf87b01ed873632213eb12 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 11:24:08 +0100
Subject: [PATCH 257/276] Newsfile

---
 changelog.d/6195.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6195.bugfix

diff --git a/changelog.d/6195.bugfix b/changelog.d/6195.bugfix
new file mode 100644
index 000000000..d22935dbc
--- /dev/null
+++ b/changelog.d/6195.bugfix
@@ -0,0 +1 @@
+Fix tracing of non-JSON APIs, /media, /key etc.

From fca3a541e7e5845d61c519be7223a035374ed698 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 12:05:27 +0100
Subject: [PATCH 258/276] Port rest/admin/__init__.py to async/await

---
 synapse/rest/admin/__init__.py | 127 ++++++++++++++-------------------
 1 file changed, 55 insertions(+), 72 deletions(-)

diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 81b6bd881..f7b948300 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -23,8 +23,6 @@ import re
 from six import text_type
 from six.moves import http_client
 
-from twisted.internet import defer
-
 import synapse
 from synapse.api.constants import Membership, UserTypes
 from synapse.api.errors import Codes, NotFoundError, SynapseError
@@ -59,15 +57,14 @@ class UsersRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.handlers = hs.get_handlers()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, user_id):
+    async def on_GET(self, request, user_id):
         target_user = UserID.from_string(user_id)
-        yield assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self.auth, request)
 
         if not self.hs.is_mine(target_user):
             raise SynapseError(400, "Can only users a local user")
 
-        ret = yield self.handlers.admin_handler.get_users()
+        ret = await self.handlers.admin_handler.get_users()
 
         return 200, ret
 
@@ -122,8 +119,7 @@ class UserRegisterServlet(RestServlet):
         self.nonces[nonce] = int(self.reactor.seconds())
         return 200, {"nonce": nonce}
 
-    @defer.inlineCallbacks
-    def on_POST(self, request):
+    async def on_POST(self, request):
         self._clear_old_nonces()
 
         if not self.hs.config.registration_shared_secret:
@@ -204,14 +200,14 @@ class UserRegisterServlet(RestServlet):
 
         register = RegisterRestServlet(self.hs)
 
-        user_id = yield register.registration_handler.register_user(
+        user_id = await register.registration_handler.register_user(
             localpart=body["username"].lower(),
             password=body["password"],
             admin=bool(admin),
             user_type=user_type,
         )
 
-        result = yield register._create_registration_details(user_id, body)
+        result = await register._create_registration_details(user_id, body)
         return 200, result
 
 
@@ -223,19 +219,18 @@ class WhoisRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.handlers = hs.get_handlers()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, user_id):
+    async def on_GET(self, request, user_id):
         target_user = UserID.from_string(user_id)
-        requester = yield self.auth.get_user_by_req(request)
+        requester = await self.auth.get_user_by_req(request)
         auth_user = requester.user
 
         if target_user != auth_user:
-            yield assert_user_is_admin(self.auth, auth_user)
+            await assert_user_is_admin(self.auth, auth_user)
 
         if not self.hs.is_mine(target_user):
             raise SynapseError(400, "Can only whois a local user")
 
-        ret = yield self.handlers.admin_handler.get_whois(target_user)
+        ret = await self.handlers.admin_handler.get_whois(target_user)
 
         return 200, ret
 
@@ -255,9 +250,8 @@ class PurgeHistoryRestServlet(RestServlet):
         self.store = hs.get_datastore()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, room_id, event_id):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_POST(self, request, room_id, event_id):
+        await assert_requester_is_admin(self.auth, request)
 
         body = parse_json_object_from_request(request, allow_empty_body=True)
 
@@ -270,12 +264,12 @@ class PurgeHistoryRestServlet(RestServlet):
             event_id = body.get("purge_up_to_event_id")
 
         if event_id is not None:
-            event = yield self.store.get_event(event_id)
+            event = await self.store.get_event(event_id)
 
             if event.room_id != room_id:
                 raise SynapseError(400, "Event is for wrong room.")
 
-            token = yield self.store.get_topological_token_for_event(event_id)
+            token = await self.store.get_topological_token_for_event(event_id)
 
             logger.info("[purge] purging up to token %s (event_id %s)", token, event_id)
         elif "purge_up_to_ts" in body:
@@ -285,12 +279,10 @@ class PurgeHistoryRestServlet(RestServlet):
                     400, "purge_up_to_ts must be an int", errcode=Codes.BAD_JSON
                 )
 
-            stream_ordering = (yield self.store.find_first_stream_ordering_after_ts(ts))
+            stream_ordering = await self.store.find_first_stream_ordering_after_ts(ts)
 
-            r = (
-                yield self.store.get_room_event_after_stream_ordering(
-                    room_id, stream_ordering
-                )
+            r = await self.store.get_room_event_after_stream_ordering(
+                room_id, stream_ordering
             )
             if not r:
                 logger.warn(
@@ -318,7 +310,7 @@ class PurgeHistoryRestServlet(RestServlet):
                 errcode=Codes.BAD_JSON,
             )
 
-        purge_id = yield self.pagination_handler.start_purge_history(
+        purge_id = await self.pagination_handler.start_purge_history(
             room_id, token, delete_local_events=delete_local_events
         )
 
@@ -339,9 +331,8 @@ class PurgeHistoryStatusRestServlet(RestServlet):
         self.pagination_handler = hs.get_pagination_handler()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, purge_id):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_GET(self, request, purge_id):
+        await assert_requester_is_admin(self.auth, request)
 
         purge_status = self.pagination_handler.get_purge_status(purge_id)
         if purge_status is None:
@@ -357,9 +348,8 @@ class DeactivateAccountRestServlet(RestServlet):
         self._deactivate_account_handler = hs.get_deactivate_account_handler()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, target_user_id):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_POST(self, request, target_user_id):
+        await assert_requester_is_admin(self.auth, request)
         body = parse_json_object_from_request(request, allow_empty_body=True)
         erase = body.get("erase", False)
         if not isinstance(erase, bool):
@@ -371,7 +361,7 @@ class DeactivateAccountRestServlet(RestServlet):
 
         UserID.from_string(target_user_id)
 
-        result = yield self._deactivate_account_handler.deactivate_account(
+        result = await self._deactivate_account_handler.deactivate_account(
             target_user_id, erase
         )
         if result:
@@ -405,10 +395,9 @@ class ShutdownRoomRestServlet(RestServlet):
         self.room_member_handler = hs.get_room_member_handler()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, room_id):
-        requester = yield self.auth.get_user_by_req(request)
-        yield assert_user_is_admin(self.auth, requester.user)
+    async def on_POST(self, request, room_id):
+        requester = await self.auth.get_user_by_req(request)
+        await assert_user_is_admin(self.auth, requester.user)
 
         content = parse_json_object_from_request(request)
         assert_params_in_dict(content, ["new_room_user_id"])
@@ -419,7 +408,7 @@ class ShutdownRoomRestServlet(RestServlet):
         message = content.get("message", self.DEFAULT_MESSAGE)
         room_name = content.get("room_name", "Content Violation Notification")
 
-        info = yield self._room_creation_handler.create_room(
+        info = await self._room_creation_handler.create_room(
             room_creator_requester,
             config={
                 "preset": "public_chat",
@@ -438,9 +427,9 @@ class ShutdownRoomRestServlet(RestServlet):
 
         # This will work even if the room is already blocked, but that is
         # desirable in case the first attempt at blocking the room failed below.
-        yield self.store.block_room(room_id, requester_user_id)
+        await self.store.block_room(room_id, requester_user_id)
 
-        users = yield self.state.get_current_users_in_room(room_id)
+        users = await self.state.get_current_users_in_room(room_id)
         kicked_users = []
         failed_to_kick_users = []
         for user_id in users:
@@ -451,7 +440,7 @@ class ShutdownRoomRestServlet(RestServlet):
 
             try:
                 target_requester = create_requester(user_id)
-                yield self.room_member_handler.update_membership(
+                await self.room_member_handler.update_membership(
                     requester=target_requester,
                     target=target_requester.user,
                     room_id=room_id,
@@ -461,9 +450,9 @@ class ShutdownRoomRestServlet(RestServlet):
                     require_consent=False,
                 )
 
-                yield self.room_member_handler.forget(target_requester.user, room_id)
+                await self.room_member_handler.forget(target_requester.user, room_id)
 
-                yield self.room_member_handler.update_membership(
+                await self.room_member_handler.update_membership(
                     requester=target_requester,
                     target=target_requester.user,
                     room_id=new_room_id,
@@ -480,7 +469,7 @@ class ShutdownRoomRestServlet(RestServlet):
                 )
                 failed_to_kick_users.append(user_id)
 
-        yield self.event_creation_handler.create_and_send_nonmember_event(
+        await self.event_creation_handler.create_and_send_nonmember_event(
             room_creator_requester,
             {
                 "type": "m.room.message",
@@ -491,9 +480,9 @@ class ShutdownRoomRestServlet(RestServlet):
             ratelimit=False,
         )
 
-        aliases_for_room = yield self.store.get_aliases_for_room(room_id)
+        aliases_for_room = await self.store.get_aliases_for_room(room_id)
 
-        yield self.store.update_aliases_for_room(
+        await self.store.update_aliases_for_room(
             room_id, new_room_id, requester_user_id
         )
 
@@ -532,13 +521,12 @@ class ResetPasswordRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self._set_password_handler = hs.get_set_password_handler()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, target_user_id):
+    async def on_POST(self, request, target_user_id):
         """Post request to allow an administrator reset password for a user.
         This needs user to have administrator access in Synapse.
         """
-        requester = yield self.auth.get_user_by_req(request)
-        yield assert_user_is_admin(self.auth, requester.user)
+        requester = await self.auth.get_user_by_req(request)
+        await assert_user_is_admin(self.auth, requester.user)
 
         UserID.from_string(target_user_id)
 
@@ -546,7 +534,7 @@ class ResetPasswordRestServlet(RestServlet):
         assert_params_in_dict(params, ["new_password"])
         new_password = params["new_password"]
 
-        yield self._set_password_handler.set_password(
+        await self._set_password_handler.set_password(
             target_user_id, new_password, requester
         )
         return 200, {}
@@ -572,12 +560,11 @@ class GetUsersPaginatedRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.handlers = hs.get_handlers()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, target_user_id):
+    async def on_GET(self, request, target_user_id):
         """Get request to get specific number of users from Synapse.
         This needs user to have administrator access in Synapse.
         """
-        yield assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self.auth, request)
 
         target_user = UserID.from_string(target_user_id)
 
@@ -590,11 +577,10 @@ class GetUsersPaginatedRestServlet(RestServlet):
 
         logger.info("limit: %s, start: %s", limit, start)
 
-        ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit)
+        ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit)
         return 200, ret
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, target_user_id):
+    async def on_POST(self, request, target_user_id):
         """Post request to get specific number of users from Synapse..
         This needs user to have administrator access in Synapse.
         Example:
@@ -608,7 +594,7 @@ class GetUsersPaginatedRestServlet(RestServlet):
         Returns:
             200 OK with json object {list[dict[str, Any]], count} or empty object.
         """
-        yield assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self.auth, request)
         UserID.from_string(target_user_id)
 
         order = "name"  # order by name in user table
@@ -618,7 +604,7 @@ class GetUsersPaginatedRestServlet(RestServlet):
         start = params["start"]
         logger.info("limit: %s, start: %s", limit, start)
 
-        ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit)
+        ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit)
         return 200, ret
 
 
@@ -641,13 +627,12 @@ class SearchUsersRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.handlers = hs.get_handlers()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, target_user_id):
+    async def on_GET(self, request, target_user_id):
         """Get request to search user table for specific users according to
         search term.
         This needs user to have a administrator access in Synapse.
         """
-        yield assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self.auth, request)
 
         target_user = UserID.from_string(target_user_id)
 
@@ -661,7 +646,7 @@ class SearchUsersRestServlet(RestServlet):
         term = parse_string(request, "term", required=True)
         logger.info("term: %s ", term)
 
-        ret = yield self.handlers.admin_handler.search_users(term)
+        ret = await self.handlers.admin_handler.search_users(term)
         return 200, ret
 
 
@@ -676,15 +661,14 @@ class DeleteGroupAdminRestServlet(RestServlet):
         self.is_mine_id = hs.is_mine_id
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, group_id):
-        requester = yield self.auth.get_user_by_req(request)
-        yield assert_user_is_admin(self.auth, requester.user)
+    async def on_POST(self, request, group_id):
+        requester = await self.auth.get_user_by_req(request)
+        await assert_user_is_admin(self.auth, requester.user)
 
         if not self.is_mine_id(group_id):
             raise SynapseError(400, "Can only delete local groups")
 
-        yield self.group_server.delete_group(group_id, requester.user.to_string())
+        await self.group_server.delete_group(group_id, requester.user.to_string())
         return 200, {}
 
 
@@ -700,16 +684,15 @@ class AccountValidityRenewServlet(RestServlet):
         self.account_activity_handler = hs.get_account_validity_handler()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_POST(self, request):
+        await assert_requester_is_admin(self.auth, request)
 
         body = parse_json_object_from_request(request)
 
         if "user_id" not in body:
             raise SynapseError(400, "Missing property 'user_id' in the request body")
 
-        expiration_ts = yield self.account_activity_handler.renew_account_for_user(
+        expiration_ts = await self.account_activity_handler.renew_account_for_user(
             body["user_id"],
             body.get("expiration_ts"),
             not body.get("enable_renewal_emails", True),

From 281f887090d38f14df9b011ad1e5379a25be9c3e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 12:17:59 +0100
Subject: [PATCH 259/276] Port synaps/rest/client/media.py to async/await

---
 synapse/rest/admin/media.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index ed7086d09..fa833e54c 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -16,8 +16,6 @@
 
 import logging
 
-from twisted.internet import defer
-
 from synapse.api.errors import AuthError
 from synapse.http.servlet import RestServlet, parse_integer
 from synapse.rest.admin._base import (
@@ -40,12 +38,11 @@ class QuarantineMediaInRoom(RestServlet):
         self.store = hs.get_datastore()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, room_id):
-        requester = yield self.auth.get_user_by_req(request)
-        yield assert_user_is_admin(self.auth, requester.user)
+    async def on_POST(self, request, room_id):
+        requester = await self.auth.get_user_by_req(request)
+        await assert_user_is_admin(self.auth, requester.user)
 
-        num_quarantined = yield self.store.quarantine_media_ids_in_room(
+        num_quarantined = await self.store.quarantine_media_ids_in_room(
             room_id, requester.user.to_string()
         )
 
@@ -62,14 +59,13 @@ class ListMediaInRoom(RestServlet):
         self.store = hs.get_datastore()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, room_id):
-        requester = yield self.auth.get_user_by_req(request)
-        is_admin = yield self.auth.is_server_admin(requester.user)
+    async def on_GET(self, request, room_id):
+        requester = await self.auth.get_user_by_req(request)
+        is_admin = await self.auth.is_server_admin(requester.user)
         if not is_admin:
             raise AuthError(403, "You are not a server admin")
 
-        local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id)
+        local_mxcs, remote_mxcs = await self.store.get_media_mxcs_in_room(room_id)
 
         return 200, {"local": local_mxcs, "remote": remote_mxcs}
 
@@ -81,14 +77,13 @@ class PurgeMediaCacheRestServlet(RestServlet):
         self.media_repository = hs.get_media_repository()
         self.auth = hs.get_auth()
 
-    @defer.inlineCallbacks
-    def on_POST(self, request):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_POST(self, request):
+        await assert_requester_is_admin(self.auth, request)
 
         before_ts = parse_integer(request, "before_ts", required=True)
         logger.info("before_ts: %r", before_ts)
 
-        ret = yield self.media_repository.delete_old_remote_media(before_ts)
+        ret = await self.media_repository.delete_old_remote_media(before_ts)
 
         return 200, ret
 

From f95325e22a829fa94e0e4f3e6cc832c799324cf7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 12:20:24 +0100
Subject: [PATCH 260/276] Port synaps/rest/client/server_notice_servlet.py to
 async/await

---
 synapse/rest/admin/server_notice_servlet.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py
index ae2cbe2e0..6e9a87412 100644
--- a/synapse/rest/admin/server_notice_servlet.py
+++ b/synapse/rest/admin/server_notice_servlet.py
@@ -14,8 +14,6 @@
 # limitations under the License.
 import re
 
-from twisted.internet import defer
-
 from synapse.api.constants import EventTypes
 from synapse.api.errors import SynapseError
 from synapse.http.servlet import (
@@ -69,9 +67,8 @@ class SendServerNoticeServlet(RestServlet):
             self.__class__.__name__,
         )
 
-    @defer.inlineCallbacks
-    def on_POST(self, request, txn_id=None):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_POST(self, request, txn_id=None):
+        await assert_requester_is_admin(self.auth, request)
         body = parse_json_object_from_request(request)
         assert_params_in_dict(body, ("user_id", "content"))
         event_type = body.get("type", EventTypes.Message)
@@ -85,7 +82,7 @@ class SendServerNoticeServlet(RestServlet):
         if not self.hs.is_mine_id(user_id):
             raise SynapseError(400, "Server notices can only be sent to local users")
 
-        event = yield self.snm.send_notice(
+        event = await self.snm.send_notice(
             user_id=body["user_id"],
             type=event_type,
             state_key=state_key,

From dfbb62c28de8e6484fcc5ffdc89a5d62ae40c983 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 12:20:29 +0100
Subject: [PATCH 261/276] Port synaps/rest/client/users.py to async/await

---
 synapse/rest/admin/users.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 9720a3bab..d5d124a0d 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -14,8 +14,6 @@
 # limitations under the License.
 import re
 
-from twisted.internet import defer
-
 from synapse.api.errors import SynapseError
 from synapse.http.servlet import (
     RestServlet,
@@ -59,24 +57,22 @@ class UserAdminServlet(RestServlet):
         self.auth = hs.get_auth()
         self.handlers = hs.get_handlers()
 
-    @defer.inlineCallbacks
-    def on_GET(self, request, user_id):
-        yield assert_requester_is_admin(self.auth, request)
+    async def on_GET(self, request, user_id):
+        await assert_requester_is_admin(self.auth, request)
 
         target_user = UserID.from_string(user_id)
 
         if not self.hs.is_mine(target_user):
             raise SynapseError(400, "Only local users can be admins of this homeserver")
 
-        is_admin = yield self.handlers.admin_handler.get_user_server_admin(target_user)
+        is_admin = await self.handlers.admin_handler.get_user_server_admin(target_user)
         is_admin = bool(is_admin)
 
         return 200, {"admin": is_admin}
 
-    @defer.inlineCallbacks
-    def on_PUT(self, request, user_id):
-        requester = yield self.auth.get_user_by_req(request)
-        yield assert_user_is_admin(self.auth, requester.user)
+    async def on_PUT(self, request, user_id):
+        requester = await self.auth.get_user_by_req(request)
+        await assert_user_is_admin(self.auth, requester.user)
         auth_user = requester.user
 
         target_user = UserID.from_string(user_id)
@@ -93,7 +89,7 @@ class UserAdminServlet(RestServlet):
         if target_user == auth_user and not set_admin_to:
             raise SynapseError(400, "You may not demote yourself.")
 
-        yield self.handlers.admin_handler.set_user_server_admin(
+        await self.handlers.admin_handler.set_user_server_admin(
             target_user, set_admin_to
         )
 

From c3b0fbe9c3a71d000f2358122c45d33f4f9e55c1 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 12:24:52 +0100
Subject: [PATCH 262/276] Newsfile

---
 changelog.d/6196.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6196.misc

diff --git a/changelog.d/6196.misc b/changelog.d/6196.misc
new file mode 100644
index 000000000..3897b1216
--- /dev/null
+++ b/changelog.d/6196.misc
@@ -0,0 +1 @@
+Port synapse.rest.admin module to use async/await.

From be9b55e0d2b758bd7d9be4273253ea115c5362a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rousset?= <tharvik@users.noreply.github.com>
Date: Fri, 11 Oct 2019 13:33:12 +0200
Subject: [PATCH 263/276] cas: support setting display name (#6114)

Now, the CAS server can return an attribute stating what's the desired displayname, instead of using the username directly.
---
 changelog.d/6114.feature        | 1 +
 docs/sample_config.yaml         | 1 +
 synapse/config/cas.py           | 3 +++
 synapse/rest/client/v1/login.py | 4 +++-
 4 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6114.feature

diff --git a/changelog.d/6114.feature b/changelog.d/6114.feature
new file mode 100644
index 000000000..a34ab1214
--- /dev/null
+++ b/changelog.d/6114.feature
@@ -0,0 +1 @@
+CAS login now provides a default display name for users if a `displayname_attribute` is set in the configuration file.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 43893399a..8226978ba 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1220,6 +1220,7 @@ saml2_config:
 #   enabled: true
 #   server_url: "https://cas-server.com"
 #   service_url: "https://homeserver.domain.com:8448"
+#   #displayname_attribute: name
 #   #required_attributes:
 #   #    name: value
 
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index b916c3aa6..4526c1a67 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -30,11 +30,13 @@ class CasConfig(Config):
             self.cas_enabled = cas_config.get("enabled", True)
             self.cas_server_url = cas_config["server_url"]
             self.cas_service_url = cas_config["service_url"]
+            self.cas_displayname_attribute = cas_config.get("displayname_attribute")
             self.cas_required_attributes = cas_config.get("required_attributes", {})
         else:
             self.cas_enabled = False
             self.cas_server_url = None
             self.cas_service_url = None
+            self.cas_displayname_attribute = None
             self.cas_required_attributes = {}
 
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
@@ -45,6 +47,7 @@ class CasConfig(Config):
         #   enabled: true
         #   server_url: "https://cas-server.com"
         #   service_url: "https://homeserver.domain.com:8448"
+        #   #displayname_attribute: name
         #   #required_attributes:
         #   #    name: value
         """
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 9cddbc752..8414af08c 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -377,6 +377,7 @@ class CasTicketServlet(RestServlet):
         super(CasTicketServlet, self).__init__()
         self.cas_server_url = hs.config.cas_server_url
         self.cas_service_url = hs.config.cas_service_url
+        self.cas_displayname_attribute = hs.config.cas_displayname_attribute
         self.cas_required_attributes = hs.config.cas_required_attributes
         self._sso_auth_handler = SSOAuthHandler(hs)
         self._http_client = hs.get_simple_http_client()
@@ -400,6 +401,7 @@ class CasTicketServlet(RestServlet):
 
     def handle_cas_response(self, request, cas_response_body, client_redirect_url):
         user, attributes = self.parse_cas_response(cas_response_body)
+        displayname = attributes.pop(self.cas_displayname_attribute, None)
 
         for required_attribute, required_value in self.cas_required_attributes.items():
             # If required attribute was not in CAS Response - Forbidden
@@ -414,7 +416,7 @@ class CasTicketServlet(RestServlet):
                     raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED)
 
         return self._sso_auth_handler.on_successful_auth(
-            user, request, client_redirect_url
+            user, request, client_redirect_url, displayname
         )
 
     def parse_cas_response(self, cas_response_body):

From 3c2d6c708cd93df7fc945e10014049e9f9b36f46 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 15:26:09 +0100
Subject: [PATCH 264/276] Add maybe_awaitable and fix __init__ bugs

---
 synapse/rest/admin/__init__.py |  7 +++++--
 synapse/util/async_helpers.py  | 29 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index f7b948300..939418ee2 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -44,6 +44,7 @@ from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet
 from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet
 from synapse.rest.admin.users import UserAdminServlet
 from synapse.types import UserID, create_requester
+from synapse.util.async_helpers import maybe_awaitable
 from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger(__name__)
@@ -310,7 +311,7 @@ class PurgeHistoryRestServlet(RestServlet):
                 errcode=Codes.BAD_JSON,
             )
 
-        purge_id = await self.pagination_handler.start_purge_history(
+        purge_id = self.pagination_handler.start_purge_history(
             room_id, token, delete_local_events=delete_local_events
         )
 
@@ -480,7 +481,9 @@ class ShutdownRoomRestServlet(RestServlet):
             ratelimit=False,
         )
 
-        aliases_for_room = await self.store.get_aliases_for_room(room_id)
+        aliases_for_room = await maybe_awaitable(
+            self.store.get_aliases_for_room(room_id)
+        )
 
         await self.store.update_aliases_for_room(
             room_id, new_room_id, requester_user_id
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 0d3bdd88c..804dbca44 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -21,6 +21,8 @@ from typing import Dict, Sequence, Set, Union
 
 from six.moves import range
 
+import attr
+
 from twisted.internet import defer
 from twisted.internet.defer import CancelledError
 from twisted.python import failure
@@ -483,3 +485,30 @@ def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None):
     deferred.addCallbacks(success_cb, failure_cb)
 
     return new_d
+
+
+@attr.s(slots=True, frozen=True)
+class DoneAwaitable(object):
+    """Simple awaitable that returns the provided value.
+    """
+
+    value = attr.ib()
+
+    def __await__(self):
+        return self
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise StopIteration(self.value)
+
+
+def maybe_awaitable(value):
+    """Convert a value to an awaitable if not already an awaitable.
+    """
+
+    if hasattr(value, "__await__"):
+        return value
+
+    return DoneAwaitable(value)

From 2e97a4c1978639c0beffd62417e51724ae2c77db Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 11 Oct 2019 15:29:26 +0100
Subject: [PATCH 265/276] Port synapse/rest/client/_base.py to async/await

---
 synapse/rest/admin/_base.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py
index 5a9b08d3e..afd064720 100644
--- a/synapse/rest/admin/_base.py
+++ b/synapse/rest/admin/_base.py
@@ -15,8 +15,6 @@
 
 import re
 
-from twisted.internet import defer
-
 from synapse.api.errors import AuthError
 
 
@@ -42,8 +40,7 @@ def historical_admin_path_patterns(path_regex):
     )
 
 
-@defer.inlineCallbacks
-def assert_requester_is_admin(auth, request):
+async def assert_requester_is_admin(auth, request):
     """Verify that the requester is an admin user
 
     WARNING: MAKE SURE YOU YIELD ON THE RESULT!
@@ -58,12 +55,11 @@ def assert_requester_is_admin(auth, request):
     Raises:
         AuthError if the requester is not an admin
     """
-    requester = yield auth.get_user_by_req(request)
-    yield assert_user_is_admin(auth, requester.user)
+    requester = await auth.get_user_by_req(request)
+    await assert_user_is_admin(auth, requester.user)
 
 
-@defer.inlineCallbacks
-def assert_user_is_admin(auth, user_id):
+async def assert_user_is_admin(auth, user_id):
     """Verify that the given user is an admin user
 
     WARNING: MAKE SURE YOU YIELD ON THE RESULT!
@@ -79,6 +75,6 @@ def assert_user_is_admin(auth, user_id):
         AuthError if the user is not an admin
     """
 
-    is_admin = yield auth.is_server_admin(user_id)
+    is_admin = await auth.is_server_admin(user_id)
     if not is_admin:
         raise AuthError(403, "You are not a server admin")

From 132b251e2963b0e509afe00796f1b227e567b989 Mon Sep 17 00:00:00 2001
From: Hubert Chathi <hubert@uhoreg.ca>
Date: Fri, 11 Oct 2019 14:24:52 -0400
Subject: [PATCH 266/276] expand on comment

---
 synapse/storage/end_to_end_keys.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py
index b00a391c8..872bc7549 100644
--- a/synapse/storage/end_to_end_keys.py
+++ b/synapse/storage/end_to_end_keys.py
@@ -40,7 +40,8 @@ class EndToEndKeyWorkerStore(SQLBaseStore):
                 This option only takes effect if include_all_devices is true.
         Returns:
             Dict mapping from user-id to dict mapping from device_id to
-            key data.
+            key data.  The key data will be a dict in the same format as the
+            DeviceKeys type returned by POST /_matrix/client/r0/keys/query.
         """
         set_tag("query_list", query_list)
         if not query_list:

From a2bb50c2eb431414d999ec682b236620528b00e1 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 9 Oct 2019 15:39:13 +0100
Subject: [PATCH 267/276] Merge pull request #6185 from
 matrix-org/erikj/fix_censored_evnets

Fix inserting bytes as text in `censor_redactions`
---
 changelog.d/6185.bugfix                       |  1 +
 synapse/storage/events.py                     |  6 ++---
 .../redaction_censor3_fix_update.sql.postgres | 26 +++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6185.bugfix
 create mode 100644 synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres

diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix
new file mode 100644
index 000000000..9d1c669b8
--- /dev/null
+++ b/changelog.d/6185.bugfix
@@ -0,0 +1 @@
+Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events.
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 2e485c864..bb6ff0595 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -23,7 +23,7 @@ from functools import wraps
 from six import iteritems, text_type
 from six.moves import range
 
-from canonicaljson import encode_canonical_json, json
+from canonicaljson import json
 from prometheus_client import Counter, Histogram
 
 from twisted.internet import defer
@@ -1632,9 +1632,7 @@ class EventsStore(
                 and original_event.internal_metadata.is_redacted()
             ):
                 # Redaction was allowed
-                pruned_json = encode_canonical_json(
-                    prune_event_dict(original_event.get_dict())
-                )
+                pruned_json = encode_json(prune_event_dict(original_event.get_dict()))
             else:
                 # Redaction wasn't allowed
                 pruned_json = None
diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
new file mode 100644
index 000000000..f7bcc5e2f
--- /dev/null
+++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
@@ -0,0 +1,26 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- There was a bug where we may have updated censored redactions as bytes,
+-- which can (somehow) cause json to be inserted hex encoded. This goes and
+-- undoes any such hex encoded JSON.
+UPDATE event_json SET json = convert_from(json::bytea, 'utf8')
+WHERE event_id IN (
+  SELECT event_json.event_id
+  FROM event_json
+  INNER JOIN redactions ON (event_json.event_id = redacts)
+  WHERE have_censored AND json NOT LIKE '{%'
+);

From 5b0e9948eaae801643e594b5abc8ee4b10bd194e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 9 Oct 2019 16:03:24 +0100
Subject: [PATCH 268/276] Do the update as a background index

---
 synapse/storage/events_bg_updates.py          | 43 +++++++++++++++++++
 .../redaction_censor3_fix_update.sql.postgres | 17 ++++----
 2 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py
index 5717baf48..e77a7e28a 100644
--- a/synapse/storage/events_bg_updates.py
+++ b/synapse/storage/events_bg_updates.py
@@ -71,6 +71,19 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             "redactions_received_ts", self._redactions_received_ts
         )
 
+        # This index gets deleted in `event_fix_redactions_bytes` update
+        self.register_background_index_update(
+            "event_fix_redactions_bytes_create_index",
+            index_name="redactions_censored_redacts",
+            table="redactions",
+            columns=["redacts"],
+            where_clause="have_censored",
+        )
+
+        self.register_background_update_handler(
+            "event_fix_redactions_bytes", self._event_fix_redactions_bytes
+        )
+
     @defer.inlineCallbacks
     def _background_reindex_fields_sender(self, progress, batch_size):
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
@@ -458,3 +471,33 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
             yield self._end_background_update("redactions_received_ts")
 
         return count
+
+    @defer.inlineCallbacks
+    def _event_fix_redactions_bytes(self, progress, batch_size):
+        """Undoes hex encoded censored redacted event JSON.
+        """
+
+        def _event_fix_redactions_bytes_txn(txn):
+            # This update is quite fast due to new index.
+            txn.execute(
+                """
+                UPDATE event_json
+                SET
+                    json = convert_from(json::bytea, 'utf8')
+                FROM redactions
+                WHERE
+                    redactions.have_censored
+                    AND event_json.event_id = redactions.redacts
+                    AND json NOT LIKE '{%';
+                """
+            )
+
+            txn.execute("DROP INDEX redactions_censored_redacts")
+
+        yield self.runInteraction(
+            "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn
+        )
+
+        yield self._end_background_update("event_fix_redactions_bytes")
+
+        return 1
diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
index f7bcc5e2f..67471f3ef 100644
--- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
+++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres
@@ -15,12 +15,11 @@
 
 
 -- There was a bug where we may have updated censored redactions as bytes,
--- which can (somehow) cause json to be inserted hex encoded. This goes and
--- undoes any such hex encoded JSON.
-UPDATE event_json SET json = convert_from(json::bytea, 'utf8')
-WHERE event_id IN (
-  SELECT event_json.event_id
-  FROM event_json
-  INNER JOIN redactions ON (event_json.event_id = redacts)
-  WHERE have_censored AND json NOT LIKE '{%'
-);
+-- which can (somehow) cause json to be inserted hex encoded. These updates go
+-- and undoes any such hex encoded JSON.
+
+INSERT into background_updates (update_name, progress_json)
+  VALUES ('event_fix_redactions_bytes_create_index', '{}');
+
+INSERT into background_updates (update_name, progress_json, depends_on)
+  VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index');

From 71cd3fed669a55e6ef000591ca89fe01b37a5ee1 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 17 Oct 2019 16:40:56 +0100
Subject: [PATCH 269/276] 1.4.1rc1

---
 CHANGES.md              | 8 ++++++++
 changelog.d/6185.bugfix | 1 -
 synapse/__init__.py     | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)
 delete mode 100644 changelog.d/6185.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 165e1d4db..ecba33bd3 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,11 @@
+Synapse 1.4.1rc1 (2019-10-17)
+=============================
+
+Bugfixes
+--------
+
+- Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events. ([\#6185](https://github.com/matrix-org/synapse/issues/6185), [5b0e9948](https://github.com/matrix-org/synapse/commit/5b0e9948eaae801643e594b5abc8ee4b10bd194e))
+
 Synapse 1.4.0 (2019-10-03)
 ==========================
 
diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix
deleted file mode 100644
index 9d1c669b8..000000000
--- a/changelog.d/6185.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 2d52d26af..2fc0c3cf8 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.4.0"
+__version__ = "1.4.1rc1"

From 423f7ae3974e251fef110c90a620a51789459d68 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 17 Oct 2019 17:06:07 +0100
Subject: [PATCH 270/276] Fix up changelogs

---
 changelog.d/6186.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/6186.misc

diff --git a/changelog.d/6186.misc b/changelog.d/6186.misc
new file mode 100644
index 000000000..5e1314a0a
--- /dev/null
+++ b/changelog.d/6186.misc
@@ -0,0 +1 @@
+Reject (accidental) attempts to insert bytes into postgres tables.

From 6fb0a3da07192382bd05e0309c74d0e91c3b1253 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 17 Oct 2019 18:03:28 +0100
Subject: [PATCH 271/276] Remove dead changelog file

This is part of 1.4.1
---
 changelog.d/6185.bugfix | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 changelog.d/6185.bugfix

diff --git a/changelog.d/6185.bugfix b/changelog.d/6185.bugfix
deleted file mode 100644
index 9d1c669b8..000000000
--- a/changelog.d/6185.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug where redacted events were sometimes incorrectly censored in the database, breaking APIs that attempted to fetch such events.

From 5859a5c569c03f3b7c578fe4dbf2274e37af03bb Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 18 Oct 2019 07:42:26 +0200
Subject: [PATCH 272/276] Fix presence timeouts when synchrotron restarts.
 (#6212)

* Fix presence timeouts when synchrotron restarts.

Handling timeouts would fail if there was an external process that had
timed out, e.g. a synchrotron restarting. This was due to a couple of
variable name typoes.

Fixes #3715.
---
 changelog.d/6212.bugfix         |  1 +
 synapse/handlers/presence.py    | 13 +++++++----
 tests/handlers/test_presence.py | 39 +++++++++++++++++++++++++++++++++
 tox.ini                         |  2 +-
 4 files changed, 50 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/6212.bugfix

diff --git a/changelog.d/6212.bugfix b/changelog.d/6212.bugfix
new file mode 100644
index 000000000..918755fee
--- /dev/null
+++ b/changelog.d/6212.bugfix
@@ -0,0 +1 @@
+Fix bug where presence would not get timed out correctly if a synchrotron worker is used and restarted.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 2a5f1a007..eda15bc62 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -24,6 +24,7 @@ The methods that define policy are:
 
 import logging
 from contextlib import contextmanager
+from typing import Dict, Set
 
 from six import iteritems, itervalues
 
@@ -179,8 +180,9 @@ class PresenceHandler(object):
         # we assume that all the sync requests on that process have stopped.
         # Stored as a dict from process_id to set of user_id, and a dict of
         # process_id to millisecond timestamp last updated.
-        self.external_process_to_current_syncs = {}
-        self.external_process_last_updated_ms = {}
+        self.external_process_to_current_syncs = {}  # type: Dict[int, Set[str]]
+        self.external_process_last_updated_ms = {}  # type: Dict[int, int]
+
         self.external_sync_linearizer = Linearizer(name="external_sync_linearizer")
 
         # Start a LoopingCall in 30s that fires every 5s.
@@ -349,10 +351,13 @@ class PresenceHandler(object):
             if now - last_update > EXTERNAL_PROCESS_EXPIRY
         ]
         for process_id in expired_process_ids:
+            # For each expired process drop tracking info and check the users
+            # that were syncing on that process to see if they need to be timed
+            # out.
             users_to_check.update(
-                self.external_process_last_updated_ms.pop(process_id, ())
+                self.external_process_to_current_syncs.pop(process_id, ())
             )
-            self.external_process_last_update.pop(process_id)
+            self.external_process_last_updated_ms.pop(process_id)
 
         states = [
             self.user_to_current_state.get(user_id, UserPresenceState.default(user_id))
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index f70c6e7d6..d4293b431 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -22,6 +22,7 @@ from synapse.api.constants import EventTypes, Membership, PresenceState
 from synapse.events import room_version_to_event_format
 from synapse.events.builder import EventBuilder
 from synapse.handlers.presence import (
+    EXTERNAL_PROCESS_EXPIRY,
     FEDERATION_PING_INTERVAL,
     FEDERATION_TIMEOUT,
     IDLE_TIMER,
@@ -413,6 +414,44 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEquals(state, new_state)
 
 
+class PresenceHandlerTestCase(unittest.HomeserverTestCase):
+    def prepare(self, reactor, clock, hs):
+        self.presence_handler = hs.get_presence_handler()
+        self.clock = hs.get_clock()
+
+    def test_external_process_timeout(self):
+        """Test that if an external process doesn't update the records for a while
+        we time out their syncing users presence.
+        """
+        process_id = 1
+        user_id = "@test:server"
+
+        # Notify handler that a user is now syncing.
+        self.get_success(
+            self.presence_handler.update_external_syncs_row(
+                process_id, user_id, True, self.clock.time_msec()
+            )
+        )
+
+        # Check that if we wait a while without telling the handler the user has
+        # stopped syncing that their presence state doesn't get timed out.
+        self.reactor.advance(EXTERNAL_PROCESS_EXPIRY / 2)
+
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, PresenceState.ONLINE)
+
+        # Check that if the external process timeout fires, then the syncing
+        # user gets timed out
+        self.reactor.advance(EXTERNAL_PROCESS_EXPIRY)
+
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, PresenceState.OFFLINE)
+
+
 class PresenceJoinTestCase(unittest.HomeserverTestCase):
     """Tests remote servers get told about presence of users in the room when
     they join and when new local users join.
diff --git a/tox.ini b/tox.ini
index 367cc2ccf..7ba6f6339 100644
--- a/tox.ini
+++ b/tox.ini
@@ -161,7 +161,7 @@ basepython = python3.7
 skip_install = True
 deps =
     {[base]deps}
-    mypy
+    mypy==0.730
     mypy-zope
 env =
     MYPYPATH = stubs/

From dc4bec885df275d2973f500c1488baa6d9e25197 Mon Sep 17 00:00:00 2001
From: Bart Noordervliet <bart@mmvi.nl>
Date: Fri, 18 Oct 2019 11:13:59 +0200
Subject: [PATCH 273/276] Add missing BOOLEAN_COLUMNs to synapse_port_db
 (#6216)

Small fix to synapse_port_db to be able to convert from database schema v56.
---
 changelog.d/6216.bugfix | 1 +
 scripts/synapse_port_db | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/6216.bugfix

diff --git a/changelog.d/6216.bugfix b/changelog.d/6216.bugfix
new file mode 100644
index 000000000..5784e82d1
--- /dev/null
+++ b/changelog.d/6216.bugfix
@@ -0,0 +1 @@
+synapse_port_db: Add 2 additional BOOLEAN_COLUMNS to be able to convert from database schema v56.
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index b6ba19c77..3f942abdb 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -55,6 +55,8 @@ BOOLEAN_COLUMNS = {
     "local_group_membership": ["is_publicised", "is_admin"],
     "e2e_room_keys": ["is_verified"],
     "account_validity": ["email_sent"],
+    "redactions": ["have_censored"],
+    "room_stats_state": ["is_federatable"],
 }
 
 

From 41b9faed16a2721540042d09b90a24721d18a8e9 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 18 Oct 2019 10:15:12 +0100
Subject: [PATCH 274/276] 1.4.1

---
 CHANGES.md          | 6 ++++++
 debian/changelog    | 6 ++++++
 synapse/__init__.py | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index ecba33bd3..7e92c3bf7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,9 @@
+Synapse 1.4.1 (2019-10-18)
+==========================
+
+No changes since 1.4.1rc1
+
+
 Synapse 1.4.1rc1 (2019-10-17)
 =============================
 
diff --git a/debian/changelog b/debian/changelog
index 60c682cc5..02f2b508c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.4.1) stable; urgency=medium
+
+  * New synapse release 1.4.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Fri, 18 Oct 2019 10:13:27 +0100
+
 matrix-synapse-py3 (1.4.0) stable; urgency=medium
 
   * New synapse release 1.4.0.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 2fc0c3cf8..a22567fcd 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.4.1rc1"
+__version__ = "1.4.1"

From 774b8d0930efee31b5240f8d3dc8935415fe9124 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 18 Oct 2019 10:19:02 +0100
Subject: [PATCH 275/276] Fix changelog

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 7e92c3bf7..cd23b8112 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,7 +1,7 @@
 Synapse 1.4.1 (2019-10-18)
 ==========================
 
-No changes since 1.4.1rc1
+No changes since 1.4.1rc1.
 
 
 Synapse 1.4.1rc1 (2019-10-17)

From c3772a71cbf35a4af229494d7fc0f0c809671b89 Mon Sep 17 00:00:00 2001
From: przemas75 <przemas75@gmail.com>
Date: Fri, 18 Oct 2019 11:23:33 +0200
Subject: [PATCH 276/276] Update docker-compose.yml for a static config file,
 and update traefik examples (#6142)

---
 contrib/docker/README.md          | 35 ++++++++++---------------------
 contrib/docker/docker-compose.yml | 21 ++++++++++++++-----
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/contrib/docker/README.md b/contrib/docker/README.md
index af102f759..89c1518bd 100644
--- a/contrib/docker/README.md
+++ b/contrib/docker/README.md
@@ -1,39 +1,26 @@
+
 # Synapse Docker
 
-FIXME: this is out-of-date as of
-https://github.com/matrix-org/synapse/issues/5518. Contributions to bring it up
-to date would be welcome.
-
-### Automated configuration
-
-It is recommended that you use Docker Compose to run your containers, including
-this image and a Postgres server. A sample ``docker-compose.yml`` is provided,
-including example labels for reverse proxying and other artifacts.
-
-Read the section about environment variables and set at least mandatory variables,
-then run the server:
-
-```
-docker-compose up -d
-```
-
-If secrets are not specified in the environment variables, they will be generated
-as part of the startup. Please ensure these secrets are kept between launches of the
-Docker container, as their loss may require users to log in again.
-
-### Manual configuration
+### Configuration
 
 A sample ``docker-compose.yml`` is provided, including example labels for
 reverse proxying and other artifacts. The docker-compose file is an example,
 please comment/uncomment sections that are not suitable for your usecase.
 
 Specify a ``SYNAPSE_CONFIG_PATH``, preferably to a persistent path,
-to use manual configuration. To generate a fresh ``homeserver.yaml``, simply run:
+to use manual configuration.
+
+To generate a fresh `homeserver.yaml`, you can use the `generate` command.
+(See the [documentation](../../docker/README.md#generating-a-configuration-file)
+for more information.) You will need to specify appropriate values for at least the
+`SYNAPSE_SERVER_NAME` and `SYNAPSE_REPORT_STATS` environment variables. For example:
 
 ```
-docker-compose run --rm -e SYNAPSE_SERVER_NAME=my.matrix.host synapse generate
+docker-compose run --rm -e SYNAPSE_SERVER_NAME=my.matrix.host -e SYNAPSE_REPORT_STATS=yes synapse generate
 ```
 
+(This will also generate necessary signing keys.)
+
 Then, customize your configuration and run the server:
 
 ```
diff --git a/contrib/docker/docker-compose.yml b/contrib/docker/docker-compose.yml
index 1e4ee4375..72c87054e 100644
--- a/contrib/docker/docker-compose.yml
+++ b/contrib/docker/docker-compose.yml
@@ -15,13 +15,10 @@ services:
     restart: unless-stopped
     # See the readme for a full documentation of the environment settings
     environment:
-      - SYNAPSE_SERVER_NAME=my.matrix.host
-      - SYNAPSE_REPORT_STATS=no
-      - SYNAPSE_ENABLE_REGISTRATION=yes
-      - SYNAPSE_LOG_LEVEL=INFO
-      - POSTGRES_PASSWORD=changeme
+      - SYNAPSE_CONFIG_PATH=/etc/homeserver.yaml
     volumes:
       # You may either store all the files in a local folder
+      - ./matrix-config:/etc
       - ./files:/data
       # .. or you may split this between different storage points
       # - ./files:/data
@@ -35,9 +32,23 @@ services:
       - 8448:8448/tcp
     # ... or use a reverse proxy, here is an example for traefik:
     labels:
+      # The following lines are valid for Traefik version 1.x:
       - traefik.enable=true
       - traefik.frontend.rule=Host:my.matrix.Host
       - traefik.port=8008
+      # Alternatively, for Traefik version 2.0:
+      - traefik.enable=true
+      - traefik.http.routers.http-synapse.entryPoints=http
+      - traefik.http.routers.http-synapse.rule=Host(`my.matrix.host`)
+      - traefik.http.middlewares.https_redirect.redirectscheme.scheme=https
+      - traefik.http.middlewares.https_redirect.redirectscheme.permanent=true
+      - traefik.http.routers.http-synapse.middlewares=https_redirect
+      - traefik.http.routers.https-synapse.entryPoints=https
+      - traefik.http.routers.https-synapse.rule=Host(`my.matrix.host`)
+      - traefik.http.routers.https-synapse.service=synapse
+      - traefik.http.routers.https-synapse.tls=true
+      - traefik.http.services.synapse.loadbalancer.server.port=8008
+      - traefik.http.routers.https-synapse.tls.certResolver=le-ssl
 
   db:
     image: docker.io/postgres:10-alpine