Rename database classes to make some sense (#8033)

2025-09-19 07:54:36 -04:00 · 2020-08-05 21:38:57 +01:00 · 2020-08-05 21:38:57 +01:00 · a7bdf98d01
commit a7bdf98d01
parent 0a86850ba3
337 changed files with 1408 additions and 1323 deletions
--- a/synapse/storage/databases/main/init.py
+++ b/synapse/storage/databases/main/init.py
@ -0,0 +1,596 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import calendar
+import logging
+import time
+
+from synapse.api.constants import PresenceState
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import DatabasePool
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.util.id_generators import (
+    IdGenerator,
+    MultiWriterIdGenerator,
+    StreamIdGenerator,
+)
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+from .account_data import AccountDataStore
+from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
+from .cache import CacheInvalidationWorkerStore
+from .censor_events import CensorEventsStore
+from .client_ips import ClientIpStore
+from .deviceinbox import DeviceInboxStore
+from .devices import DeviceStore
+from .directory import DirectoryStore
+from .e2e_room_keys import EndToEndRoomKeyStore
+from .end_to_end_keys import EndToEndKeyStore
+from .event_federation import EventFederationStore
+from .event_push_actions import EventPushActionsStore
+from .events_bg_updates import EventsBackgroundUpdatesStore
+from .filtering import FilteringStore
+from .group_server import GroupServerStore
+from .keys import KeyStore
+from .media_repository import MediaRepositoryStore
+from .metrics import ServerMetricsStore
+from .monthly_active_users import MonthlyActiveUsersStore
+from .openid import OpenIdStore
+from .presence import PresenceStore, UserPresenceState
+from .profile import ProfileStore
+from .purge_events import PurgeEventsStore
+from .push_rule import PushRuleStore
+from .pusher import PusherStore
+from .receipts import ReceiptsStore
+from .registration import RegistrationStore
+from .rejections import RejectionsStore
+from .relations import RelationsStore
+from .room import RoomStore
+from .roommember import RoomMemberStore
+from .search import SearchStore
+from .signatures import SignatureStore
+from .state import StateStore
+from .stats import StatsStore
+from .stream import StreamStore
+from .tags import TagsStore
+from .transactions import TransactionStore
+from .ui_auth import UIAuthStore
+from .user_directory import UserDirectoryStore
+from .user_erasure_store import UserErasureStore
+
+logger = logging.getLogger(__name__)
+
+
+class DataStore(
+    EventsBackgroundUpdatesStore,
+    RoomMemberStore,
+    RoomStore,
+    RegistrationStore,
+    StreamStore,
+    ProfileStore,
+    PresenceStore,
+    TransactionStore,
+    DirectoryStore,
+    KeyStore,
+    StateStore,
+    SignatureStore,
+    ApplicationServiceStore,
+    PurgeEventsStore,
+    EventFederationStore,
+    MediaRepositoryStore,
+    RejectionsStore,
+    FilteringStore,
+    PusherStore,
+    PushRuleStore,
+    ApplicationServiceTransactionStore,
+    ReceiptsStore,
+    EndToEndKeyStore,
+    EndToEndRoomKeyStore,
+    SearchStore,
+    TagsStore,
+    AccountDataStore,
+    EventPushActionsStore,
+    OpenIdStore,
+    ClientIpStore,
+    DeviceStore,
+    DeviceInboxStore,
+    UserDirectoryStore,
+    GroupServerStore,
+    UserErasureStore,
+    MonthlyActiveUsersStore,
+    StatsStore,
+    RelationsStore,
+    CensorEventsStore,
+    UIAuthStore,
+    CacheInvalidationWorkerStore,
+    ServerMetricsStore,
+):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        self.hs = hs
+        self._clock = hs.get_clock()
+        self.database_engine = database.engine
+
+        self._presence_id_gen = StreamIdGenerator(
+            db_conn, "presence_stream", "stream_id"
+        )
+        self._device_inbox_id_gen = StreamIdGenerator(
+            db_conn, "device_inbox", "stream_id"
+        )
+        self._public_room_id_gen = StreamIdGenerator(
+            db_conn, "public_room_list_stream", "stream_id"
+        )
+        self._device_list_id_gen = StreamIdGenerator(
+            db_conn,
+            "device_lists_stream",
+            "stream_id",
+            extra_tables=[
+                ("user_signature_stream", "stream_id"),
+                ("device_lists_outbound_pokes", "stream_id"),
+            ],
+        )
+        self._cross_signing_id_gen = StreamIdGenerator(
+            db_conn, "e2e_cross_signing_keys", "stream_id"
+        )
+
+        self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
+        self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
+        self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
+        self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id")
+        self._pushers_id_gen = StreamIdGenerator(
+            db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
+        )
+        self._group_updates_id_gen = StreamIdGenerator(
+            db_conn, "local_group_updates", "stream_id"
+        )
+
+        if isinstance(self.database_engine, PostgresEngine):
+            self._cache_id_gen = MultiWriterIdGenerator(
+                db_conn,
+                database,
+                instance_name="master",
+                table="cache_invalidation_stream_by_instance",
+                instance_column="instance_name",
+                id_column="stream_id",
+                sequence_name="cache_invalidation_stream_seq",
+            )
+        else:
+            self._cache_id_gen = None
+
+        super(DataStore, self).__init__(database, db_conn, hs)
+
+        self._presence_on_startup = self._get_active_presence(db_conn)
+
+        presence_cache_prefill, min_presence_val = self.db_pool.get_cache_dict(
+            db_conn,
+            "presence_stream",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=self._presence_id_gen.get_current_token(),
+        )
+        self.presence_stream_cache = StreamChangeCache(
+            "PresenceStreamChangeCache",
+            min_presence_val,
+            prefilled_cache=presence_cache_prefill,
+        )
+
+        max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
+        device_inbox_prefill, min_device_inbox_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "device_inbox",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=max_device_inbox_id,
+            limit=1000,
+        )
+        self._device_inbox_stream_cache = StreamChangeCache(
+            "DeviceInboxStreamChangeCache",
+            min_device_inbox_id,
+            prefilled_cache=device_inbox_prefill,
+        )
+        # The federation outbox and the local device inbox uses the same
+        # stream_id generator.
+        device_outbox_prefill, min_device_outbox_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "device_federation_outbox",
+            entity_column="destination",
+            stream_column="stream_id",
+            max_value=max_device_inbox_id,
+            limit=1000,
+        )
+        self._device_federation_outbox_stream_cache = StreamChangeCache(
+            "DeviceFederationOutboxStreamChangeCache",
+            min_device_outbox_id,
+            prefilled_cache=device_outbox_prefill,
+        )
+
+        device_list_max = self._device_list_id_gen.get_current_token()
+        self._device_list_stream_cache = StreamChangeCache(
+            "DeviceListStreamChangeCache", device_list_max
+        )
+        self._user_signature_stream_cache = StreamChangeCache(
+            "UserSignatureStreamChangeCache", device_list_max
+        )
+        self._device_list_federation_stream_cache = StreamChangeCache(
+            "DeviceListFederationStreamChangeCache", device_list_max
+        )
+
+        events_max = self._stream_id_gen.get_current_token()
+        curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "current_state_delta_stream",
+            entity_column="room_id",
+            stream_column="stream_id",
+            max_value=events_max,  # As we share the stream id with events token
+            limit=1000,
+        )
+        self._curr_state_delta_stream_cache = StreamChangeCache(
+            "_curr_state_delta_stream_cache",
+            min_curr_state_delta_id,
+            prefilled_cache=curr_state_delta_prefill,
+        )
+
+        _group_updates_prefill, min_group_updates_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "local_group_updates",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=self._group_updates_id_gen.get_current_token(),
+            limit=1000,
+        )
+        self._group_updates_stream_cache = StreamChangeCache(
+            "_group_updates_stream_cache",
+            min_group_updates_id,
+            prefilled_cache=_group_updates_prefill,
+        )
+
+        self._stream_order_on_start = self.get_room_max_stream_ordering()
+        self._min_stream_order_on_start = self.get_room_min_stream_ordering()
+
+        # Used in _generate_user_daily_visits to keep track of progress
+        self._last_user_visit_update = self._get_start_of_day()
+
+    def take_presence_startup_info(self):
+        active_on_startup = self._presence_on_startup
+        self._presence_on_startup = None
+        return active_on_startup
+
+    def _get_active_presence(self, db_conn):
+        """Fetch non-offline presence from the database so that we can register
+        the appropriate time outs.
+        """
+
+        sql = (
+            "SELECT user_id, state, last_active_ts, last_federation_update_ts,"
+            " last_user_sync_ts, status_msg, currently_active FROM presence_stream"
+            " WHERE state != ?"
+        )
+        sql = self.database_engine.convert_param_style(sql)
+
+        txn = db_conn.cursor()
+        txn.execute(sql, (PresenceState.OFFLINE,))
+        rows = self.db_pool.cursor_to_dict(txn)
+        txn.close()
+
+        for row in rows:
+            row["currently_active"] = bool(row["currently_active"])
+
+        return [UserPresenceState(**row) for row in rows]
+
+    def count_daily_users(self):
+        """
+        Counts the number of users who used this homeserver in the last 24 hours.
+        """
+        yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
+        return self.db_pool.runInteraction(
+            "count_daily_users", self._count_users, yesterday
+        )
+
+    def count_monthly_users(self):
+        """
+        Counts the number of users who used this homeserver in the last 30 days.
+        Note this method is intended for phonehome metrics only and is different
+        from the mau figure in synapse.storage.monthly_active_users which,
+        amongst other things, includes a 3 day grace period before a user counts.
+        """
+        thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
+        return self.db_pool.runInteraction(
+            "count_monthly_users", self._count_users, thirty_days_ago
+        )
+
+    def _count_users(self, txn, time_from):
+        """
+        Returns number of users seen in the past time_from period
+        """
+        sql = """
+            SELECT COALESCE(count(*), 0) FROM (
+                SELECT user_id FROM user_ips
+                WHERE last_seen > ?
+                GROUP BY user_id
+            ) u
+        """
+        txn.execute(sql, (time_from,))
+        (count,) = txn.fetchone()
+        return count
+
+    def count_r30_users(self):
+        """
+        Counts the number of 30 day retained users, defined as:-
+         * Users who have created their accounts more than 30 days ago
+         * Where last seen at most 30 days ago
+         * Where account creation and last_seen are > 30 days apart
+
+         Returns counts globaly for a given user as well as breaking
+         by platform
+        """
+
+        def _count_r30_users(txn):
+            thirty_days_in_secs = 86400 * 30
+            now = int(self._clock.time())
+            thirty_days_ago_in_secs = now - thirty_days_in_secs
+
+            sql = """
+                SELECT platform, COALESCE(count(*), 0) FROM (
+                     SELECT
+                        users.name, platform, users.creation_ts * 1000,
+                        MAX(uip.last_seen)
+                     FROM users
+                     INNER JOIN (
+                         SELECT
+                         user_id,
+                         last_seen,
+                         CASE
+                             WHEN user_agent LIKE '%%Android%%' THEN 'android'
+                             WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
+                             WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
+                             WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
+                             WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
+                             ELSE 'unknown'
+                         END
+                         AS platform
+                         FROM user_ips
+                     ) uip
+                     ON users.name = uip.user_id
+                     AND users.appservice_id is NULL
+                     AND users.creation_ts < ?
+                     AND uip.last_seen/1000 > ?
+                     AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+                     GROUP BY users.name, platform, users.creation_ts
+                ) u GROUP BY platform
+            """
+
+            results = {}
+            txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+            for row in txn:
+                if row[0] == "unknown":
+                    pass
+                results[row[0]] = row[1]
+
+            sql = """
+                SELECT COALESCE(count(*), 0) FROM (
+                    SELECT users.name, users.creation_ts * 1000,
+                                                        MAX(uip.last_seen)
+                    FROM users
+                    INNER JOIN (
+                        SELECT
+                        user_id,
+                        last_seen
+                        FROM user_ips
+                    ) uip
+                    ON users.name = uip.user_id
+                    AND appservice_id is NULL
+                    AND users.creation_ts < ?
+                    AND uip.last_seen/1000 > ?
+                    AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+                    GROUP BY users.name, users.creation_ts
+                ) u
+            """
+
+            txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+            (count,) = txn.fetchone()
+            results["all"] = count
+
+            return results
+
+        return self.db_pool.runInteraction("count_r30_users", _count_r30_users)
+
+    def _get_start_of_day(self):
+        """
+        Returns millisecond unixtime for start of UTC day.
+        """
+        now = time.gmtime()
+        today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
+        return today_start * 1000
+
+    def generate_user_daily_visits(self):
+        """
+        Generates daily visit data for use in cohort/ retention analysis
+        """
+
+        def _generate_user_daily_visits(txn):
+            logger.info("Calling _generate_user_daily_visits")
+            today_start = self._get_start_of_day()
+            a_day_in_milliseconds = 24 * 60 * 60 * 1000
+            now = self.clock.time_msec()
+
+            sql = """
+                INSERT INTO user_daily_visits (user_id, device_id, timestamp)
+                    SELECT u.user_id, u.device_id, ?
+                    FROM user_ips AS u
+                    LEFT JOIN (
+                      SELECT user_id, device_id, timestamp FROM user_daily_visits
+                      WHERE timestamp = ?
+                    ) udv
+                    ON u.user_id = udv.user_id AND u.device_id=udv.device_id
+                    INNER JOIN users ON users.name=u.user_id
+                    WHERE last_seen > ? AND last_seen <= ?
+                    AND udv.timestamp IS NULL AND users.is_guest=0
+                    AND users.appservice_id IS NULL
+                    GROUP BY u.user_id, u.device_id
+            """
+
+            # This means that the day has rolled over but there could still
+            # be entries from the previous day. There is an edge case
+            # where if the user logs in at 23:59 and overwrites their
+            # last_seen at 00:01 then they will not be counted in the
+            # previous day's stats - it is important that the query is run
+            # often to minimise this case.
+            if today_start > self._last_user_visit_update:
+                yesterday_start = today_start - a_day_in_milliseconds
+                txn.execute(
+                    sql,
+                    (
+                        yesterday_start,
+                        yesterday_start,
+                        self._last_user_visit_update,
+                        today_start,
+                    ),
+                )
+                self._last_user_visit_update = today_start
+
+            txn.execute(
+                sql, (today_start, today_start, self._last_user_visit_update, now)
+            )
+            # Update _last_user_visit_update to now. The reason to do this
+            # rather just clamping to the beginning of the day is to limit
+            # the size of the join - meaning that the query can be run more
+            # frequently
+            self._last_user_visit_update = now
+
+        return self.db_pool.runInteraction(
+            "generate_user_daily_visits", _generate_user_daily_visits
+        )
+
+    def get_users(self):
+        """Function to retrieve a list of users in users table.
+
+        Args:
+        Returns:
+            defer.Deferred: resolves to list[dict[str, Any]]
+        """
+        return self.db_pool.simple_select_list(
+            table="users",
+            keyvalues={},
+            retcols=[
+                "name",
+                "password_hash",
+                "is_guest",
+                "admin",
+                "user_type",
+                "deactivated",
+            ],
+            desc="get_users",
+        )
+
+    def get_users_paginate(
+        self, start, limit, name=None, guests=True, deactivated=False
+    ):
+        """Function to retrieve a paginated list of users from
+        users list. This will return a json list of users and the
+        total number of users matching the filter criteria.
+
+        Args:
+            start (int): start number to begin the query from
+            limit (int): number of rows to retrieve
+            name (string): filter for user names
+            guests (bool): whether to in include guest users
+            deactivated (bool): whether to include deactivated users
+        Returns:
+            defer.Deferred: resolves to list[dict[str, Any]], int
+        """
+
+        def get_users_paginate_txn(txn):
+            filters = []
+            args = []
+
+            if name:
+                filters.append("name LIKE ?")
+                args.append("%" + name + "%")
+
+            if not guests:
+                filters.append("is_guest = 0")
+
+            if not deactivated:
+                filters.append("deactivated = 0")
+
+            where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+            sql = "SELECT COUNT(*) as total_users FROM users %s" % (where_clause)
+            txn.execute(sql, args)
+            count = txn.fetchone()[0]
+
+            args = [self.hs.config.server_name] + args + [limit, start]
+            sql = """
+                SELECT name, user_type, is_guest, admin, deactivated, displayname, avatar_url
+                FROM users as u
+                LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
+                {}
+                ORDER BY u.name LIMIT ? OFFSET ?
+                """.format(
+                where_clause
+            )
+            txn.execute(sql, args)
+            users = self.db_pool.cursor_to_dict(txn)
+            return users, count
+
+        return self.db_pool.runInteraction(
+            "get_users_paginate_txn", get_users_paginate_txn
+        )
+
+    def search_users(self, term):
+        """Function to search users list for one or more users with
+        the matched term.
+
+        Args:
+            term (str): search term
+            col (str): column to query term should be matched to
+        Returns:
+            defer.Deferred: resolves to list[dict[str, Any]]
+        """
+        return self.db_pool.simple_search_list(
+            table="users",
+            term=term,
+            col="name",
+            retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
+            desc="search_users",
+        )
+
+
+def check_database_before_upgrade(cur, database_engine, config: HomeServerConfig):
+    """Called before upgrading an existing database to check that it is broadly sane
+    compared with the configuration.
+    """
+    domain = config.server_name
+
+    sql = database_engine.convert_param_style(
+        "SELECT COUNT(*) FROM users WHERE name NOT LIKE ?"
+    )
+    pat = "%:" + domain
+    cur.execute(sql, (pat,))
+    num_not_matching = cur.fetchall()[0][0]
+    if num_not_matching == 0:
+        return
+
+    raise Exception(
+        "Found users in database not native to %s!\n"
+        "You cannot changed a synapse server_name after it's been configured"
+        % (domain,)
+    )
+
+
+__all__ = ["DataStore", "check_database_before_upgrade"]
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@ -0,0 +1,430 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Tuple
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+class AccountDataWorkerStore(SQLBaseStore):
+    """This is an abstract base class where subclasses must implement
+    `get_max_account_data_stream_id` which can be called in the initializer.
+    """
+
+    # This ABCMeta metaclass ensures that we cannot be instantiated without
+    # the abstract methods being implemented.
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        account_max = self.get_max_account_data_stream_id()
+        self._account_data_stream_cache = StreamChangeCache(
+            "AccountDataAndTagsChangeCache", account_max
+        )
+
+        super(AccountDataWorkerStore, self).__init__(database, db_conn, hs)
+
+    @abc.abstractmethod
+    def get_max_account_data_stream_id(self):
+        """Get the current max stream ID for account data stream
+
+        Returns:
+            int
+        """
+        raise NotImplementedError()
+
+    @cached()
+    def get_account_data_for_user(self, user_id):
+        """Get all the client account_data for a user.
+
+        Args:
+            user_id(str): The user to get the account_data for.
+        Returns:
+            A deferred pair of a dict of global account_data and a dict
+            mapping from room_id string to per room account_data dicts.
+        """
+
+        def get_account_data_for_user_txn(txn):
+            rows = self.db_pool.simple_select_list_txn(
+                txn,
+                "account_data",
+                {"user_id": user_id},
+                ["account_data_type", "content"],
+            )
+
+            global_account_data = {
+                row["account_data_type"]: db_to_json(row["content"]) for row in rows
+            }
+
+            rows = self.db_pool.simple_select_list_txn(
+                txn,
+                "room_account_data",
+                {"user_id": user_id},
+                ["room_id", "account_data_type", "content"],
+            )
+
+            by_room = {}
+            for row in rows:
+                room_data = by_room.setdefault(row["room_id"], {})
+                room_data[row["account_data_type"]] = db_to_json(row["content"])
+
+            return global_account_data, by_room
+
+        return self.db_pool.runInteraction(
+            "get_account_data_for_user", get_account_data_for_user_txn
+        )
+
+    @cachedInlineCallbacks(num_args=2, max_entries=5000)
+    def get_global_account_data_by_type_for_user(self, data_type, user_id):
+        """
+        Returns:
+            Deferred: A dict
+        """
+        result = yield self.db_pool.simple_select_one_onecol(
+            table="account_data",
+            keyvalues={"user_id": user_id, "account_data_type": data_type},
+            retcol="content",
+            desc="get_global_account_data_by_type_for_user",
+            allow_none=True,
+        )
+
+        if result:
+            return db_to_json(result)
+        else:
+            return None
+
+    @cached(num_args=2)
+    def get_account_data_for_room(self, user_id, room_id):
+        """Get all the client account_data for a user for a room.
+
+        Args:
+            user_id(str): The user to get the account_data for.
+            room_id(str): The room to get the account_data for.
+        Returns:
+            A deferred dict of the room account_data
+        """
+
+        def get_account_data_for_room_txn(txn):
+            rows = self.db_pool.simple_select_list_txn(
+                txn,
+                "room_account_data",
+                {"user_id": user_id, "room_id": room_id},
+                ["account_data_type", "content"],
+            )
+
+            return {
+                row["account_data_type"]: db_to_json(row["content"]) for row in rows
+            }
+
+        return self.db_pool.runInteraction(
+            "get_account_data_for_room", get_account_data_for_room_txn
+        )
+
+    @cached(num_args=3, max_entries=5000)
+    def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type):
+        """Get the client account_data of given type for a user for a room.
+
+        Args:
+            user_id(str): The user to get the account_data for.
+            room_id(str): The room to get the account_data for.
+            account_data_type (str): The account data type to get.
+        Returns:
+            A deferred of the room account_data for that type, or None if
+            there isn't any set.
+        """
+
+        def get_account_data_for_room_and_type_txn(txn):
+            content_json = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="room_account_data",
+                keyvalues={
+                    "user_id": user_id,
+                    "room_id": room_id,
+                    "account_data_type": account_data_type,
+                },
+                retcol="content",
+                allow_none=True,
+            )
+
+            return db_to_json(content_json) if content_json else None
+
+        return self.db_pool.runInteraction(
+            "get_account_data_for_room_and_type", get_account_data_for_room_and_type_txn
+        )
+
+    async def get_updated_global_account_data(
+        self, last_id: int, current_id: int, limit: int
+    ) -> List[Tuple[int, str, str]]:
+        """Get the global account_data that has changed, for the account_data stream
+
+        Args:
+            last_id: the last stream_id from the previous batch.
+            current_id: the maximum stream_id to return up to
+            limit: the maximum number of rows to return
+
+        Returns:
+            A list of tuples of stream_id int, user_id string,
+            and type string.
+        """
+        if last_id == current_id:
+            return []
+
+        def get_updated_global_account_data_txn(txn):
+            sql = (
+                "SELECT stream_id, user_id, account_data_type"
+                " FROM account_data WHERE ? < stream_id AND stream_id <= ?"
+                " ORDER BY stream_id ASC LIMIT ?"
+            )
+            txn.execute(sql, (last_id, current_id, limit))
+            return txn.fetchall()
+
+        return await self.db_pool.runInteraction(
+            "get_updated_global_account_data", get_updated_global_account_data_txn
+        )
+
+    async def get_updated_room_account_data(
+        self, last_id: int, current_id: int, limit: int
+    ) -> List[Tuple[int, str, str, str]]:
+        """Get the global account_data that has changed, for the account_data stream
+
+        Args:
+            last_id: the last stream_id from the previous batch.
+            current_id: the maximum stream_id to return up to
+            limit: the maximum number of rows to return
+
+        Returns:
+            A list of tuples of stream_id int, user_id string,
+            room_id string and type string.
+        """
+        if last_id == current_id:
+            return []
+
+        def get_updated_room_account_data_txn(txn):
+            sql = (
+                "SELECT stream_id, user_id, room_id, account_data_type"
+                " FROM room_account_data WHERE ? < stream_id AND stream_id <= ?"
+                " ORDER BY stream_id ASC LIMIT ?"
+            )
+            txn.execute(sql, (last_id, current_id, limit))
+            return txn.fetchall()
+
+        return await self.db_pool.runInteraction(
+            "get_updated_room_account_data", get_updated_room_account_data_txn
+        )
+
+    def get_updated_account_data_for_user(self, user_id, stream_id):
+        """Get all the client account_data for a that's changed for a user
+
+        Args:
+            user_id(str): The user to get the account_data for.
+            stream_id(int): The point in the stream since which to get updates
+        Returns:
+            A deferred pair of a dict of global account_data and a dict
+            mapping from room_id string to per room account_data dicts.
+        """
+
+        def get_updated_account_data_for_user_txn(txn):
+            sql = (
+                "SELECT account_data_type, content FROM account_data"
+                " WHERE user_id = ? AND stream_id > ?"
+            )
+
+            txn.execute(sql, (user_id, stream_id))
+
+            global_account_data = {row[0]: db_to_json(row[1]) for row in txn}
+
+            sql = (
+                "SELECT room_id, account_data_type, content FROM room_account_data"
+                " WHERE user_id = ? AND stream_id > ?"
+            )
+
+            txn.execute(sql, (user_id, stream_id))
+
+            account_data_by_room = {}
+            for row in txn:
+                room_account_data = account_data_by_room.setdefault(row[0], {})
+                room_account_data[row[1]] = db_to_json(row[2])
+
+            return global_account_data, account_data_by_room
+
+        changed = self._account_data_stream_cache.has_entity_changed(
+            user_id, int(stream_id)
+        )
+        if not changed:
+            return defer.succeed(({}, {}))
+
+        return self.db_pool.runInteraction(
+            "get_updated_account_data_for_user", get_updated_account_data_for_user_txn
+        )
+
+    @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000)
+    def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context):
+        ignored_account_data = yield self.get_global_account_data_by_type_for_user(
+            "m.ignored_user_list",
+            ignorer_user_id,
+            on_invalidate=cache_context.invalidate,
+        )
+        if not ignored_account_data:
+            return False
+
+        return ignored_user_id in ignored_account_data.get("ignored_users", {})
+
+
+class AccountDataStore(AccountDataWorkerStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        self._account_data_id_gen = StreamIdGenerator(
+            db_conn,
+            "account_data_max_stream_id",
+            "stream_id",
+            extra_tables=[
+                ("room_account_data", "stream_id"),
+                ("room_tags_revisions", "stream_id"),
+            ],
+        )
+
+        super(AccountDataStore, self).__init__(database, db_conn, hs)
+
+    def get_max_account_data_stream_id(self):
+        """Get the current max stream id for the private user data stream
+
+        Returns:
+            A deferred int.
+        """
+        return self._account_data_id_gen.get_current_token()
+
+    @defer.inlineCallbacks
+    def add_account_data_to_room(self, user_id, room_id, account_data_type, content):
+        """Add some account_data to a room for a user.
+        Args:
+            user_id(str): The user to add a tag for.
+            room_id(str): The room to add a tag for.
+            account_data_type(str): The type of account_data to add.
+            content(dict): A json object to associate with the tag.
+        Returns:
+            A deferred that completes once the account_data has been added.
+        """
+        content_json = json.dumps(content)
+
+        with self._account_data_id_gen.get_next() as next_id:
+            # no need to lock here as room_account_data has a unique constraint
+            # on (user_id, room_id, account_data_type) so simple_upsert will
+            # retry if there is a conflict.
+            yield self.db_pool.simple_upsert(
+                desc="add_room_account_data",
+                table="room_account_data",
+                keyvalues={
+                    "user_id": user_id,
+                    "room_id": room_id,
+                    "account_data_type": account_data_type,
+                },
+                values={"stream_id": next_id, "content": content_json},
+                lock=False,
+            )
+
+            # it's theoretically possible for the above to succeed and the
+            # below to fail - in which case we might reuse a stream id on
+            # restart, and the above update might not get propagated. That
+            # doesn't sound any worse than the whole update getting lost,
+            # which is what would happen if we combined the two into one
+            # transaction.
+            yield self._update_max_stream_id(next_id)
+
+            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_account_data_for_room.invalidate((user_id, room_id))
+            self.get_account_data_for_room_and_type.prefill(
+                (user_id, room_id, account_data_type), content
+            )
+
+        result = self._account_data_id_gen.get_current_token()
+        return result
+
+    @defer.inlineCallbacks
+    def add_account_data_for_user(self, user_id, account_data_type, content):
+        """Add some account_data to a room for a user.
+        Args:
+            user_id(str): The user to add a tag for.
+            account_data_type(str): The type of account_data to add.
+            content(dict): A json object to associate with the tag.
+        Returns:
+            A deferred that completes once the account_data has been added.
+        """
+        content_json = json.dumps(content)
+
+        with self._account_data_id_gen.get_next() as next_id:
+            # no need to lock here as account_data has a unique constraint on
+            # (user_id, account_data_type) so simple_upsert will retry if
+            # there is a conflict.
+            yield self.db_pool.simple_upsert(
+                desc="add_user_account_data",
+                table="account_data",
+                keyvalues={"user_id": user_id, "account_data_type": account_data_type},
+                values={"stream_id": next_id, "content": content_json},
+                lock=False,
+            )
+
+            # it's theoretically possible for the above to succeed and the
+            # below to fail - in which case we might reuse a stream id on
+            # restart, and the above update might not get propagated. That
+            # doesn't sound any worse than the whole update getting lost,
+            # which is what would happen if we combined the two into one
+            # transaction.
+            #
+            # Note: This is only here for backwards compat to allow admins to
+            # roll back to a previous Synapse version. Next time we update the
+            # database version we can remove this table.
+            yield self._update_max_stream_id(next_id)
+
+            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_global_account_data_by_type_for_user.invalidate(
+                (account_data_type, user_id)
+            )
+
+        result = self._account_data_id_gen.get_current_token()
+        return result
+
+    def _update_max_stream_id(self, next_id):
+        """Update the max stream_id
+
+        Args:
+            next_id(int): The the revision to advance to.
+        """
+
+        # Note: This is only here for backwards compat to allow admins to
+        # roll back to a previous Synapse version. Next time we update the
+        # database version we can remove this table.
+
+        def _update(txn):
+            update_max_id_sql = (
+                "UPDATE account_data_max_stream_id"
+                " SET stream_id = ?"
+                " WHERE stream_id < ?"
+            )
+            txn.execute(update_max_id_sql, (next_id, next_id))
+
+        return self.db_pool.runInteraction("update_account_data_max_stream_id", _update)
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@ -0,0 +1,374 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import re
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.appservice import AppServiceTransaction
+from synapse.config.appservice import load_appservices
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+
+logger = logging.getLogger(__name__)
+
+
+def _make_exclusive_regex(services_cache):
+    # We precompile a regex constructed from all the regexes that the AS's
+    # have registered for exclusive users.
+    exclusive_user_regexes = [
+        regex.pattern
+        for service in services_cache
+        for regex in service.get_exclusive_user_regexes()
+    ]
+    if exclusive_user_regexes:
+        exclusive_user_regex = "|".join("(" + r + ")" for r in exclusive_user_regexes)
+        exclusive_user_regex = re.compile(exclusive_user_regex)
+    else:
+        # We handle this case specially otherwise the constructed regex
+        # will always match
+        exclusive_user_regex = None
+
+    return exclusive_user_regex
+
+
+class ApplicationServiceWorkerStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        self.services_cache = load_appservices(
+            hs.hostname, hs.config.app_service_config_files
+        )
+        self.exclusive_user_regex = _make_exclusive_regex(self.services_cache)
+
+        super(ApplicationServiceWorkerStore, self).__init__(database, db_conn, hs)
+
+    def get_app_services(self):
+        return self.services_cache
+
+    def get_if_app_services_interested_in_user(self, user_id):
+        """Check if the user is one associated with an app service (exclusively)
+        """
+        if self.exclusive_user_regex:
+            return bool(self.exclusive_user_regex.match(user_id))
+        else:
+            return False
+
+    def get_app_service_by_user_id(self, user_id):
+        """Retrieve an application service from their user ID.
+
+        All application services have associated with them a particular user ID.
+        There is no distinguishing feature on the user ID which indicates it
+        represents an application service. This function allows you to map from
+        a user ID to an application service.
+
+        Args:
+            user_id(str): The user ID to see if it is an application service.
+        Returns:
+            synapse.appservice.ApplicationService or None.
+        """
+        for service in self.services_cache:
+            if service.sender == user_id:
+                return service
+        return None
+
+    def get_app_service_by_token(self, token):
+        """Get the application service with the given appservice token.
+
+        Args:
+            token (str): The application service token.
+        Returns:
+            synapse.appservice.ApplicationService or None.
+        """
+        for service in self.services_cache:
+            if service.token == token:
+                return service
+        return None
+
+    def get_app_service_by_id(self, as_id):
+        """Get the application service with the given appservice ID.
+
+        Args:
+            as_id (str): The application service ID.
+        Returns:
+            synapse.appservice.ApplicationService or None.
+        """
+        for service in self.services_cache:
+            if service.id == as_id:
+                return service
+        return None
+
+
+class ApplicationServiceStore(ApplicationServiceWorkerStore):
+    # This is currently empty due to there not being any AS storage functions
+    # that can't be run on the workers. Since this may change in future, and
+    # to keep consistency with the other stores, we keep this empty class for
+    # now.
+    pass
+
+
+class ApplicationServiceTransactionWorkerStore(
+    ApplicationServiceWorkerStore, EventsWorkerStore
+):
+    @defer.inlineCallbacks
+    def get_appservices_by_state(self, state):
+        """Get a list of application services based on their state.
+
+        Args:
+            state(ApplicationServiceState): The state to filter on.
+        Returns:
+            A Deferred which resolves to a list of ApplicationServices, which
+            may be empty.
+        """
+        results = yield self.db_pool.simple_select_list(
+            "application_services_state", {"state": state}, ["as_id"]
+        )
+        # NB: This assumes this class is linked with ApplicationServiceStore
+        as_list = self.get_app_services()
+        services = []
+
+        for res in results:
+            for service in as_list:
+                if service.id == res["as_id"]:
+                    services.append(service)
+        return services
+
+    @defer.inlineCallbacks
+    def get_appservice_state(self, service):
+        """Get the application service state.
+
+        Args:
+            service(ApplicationService): The service whose state to set.
+        Returns:
+            A Deferred which resolves to ApplicationServiceState.
+        """
+        result = yield self.db_pool.simple_select_one(
+            "application_services_state",
+            {"as_id": service.id},
+            ["state"],
+            allow_none=True,
+            desc="get_appservice_state",
+        )
+        if result:
+            return result.get("state")
+        return None
+
+    def set_appservice_state(self, service, state):
+        """Set the application service state.
+
+        Args:
+            service(ApplicationService): The service whose state to set.
+            state(ApplicationServiceState): The connectivity state to apply.
+        Returns:
+            A Deferred which resolves when the state was set successfully.
+        """
+        return self.db_pool.simple_upsert(
+            "application_services_state", {"as_id": service.id}, {"state": state}
+        )
+
+    def create_appservice_txn(self, service, events):
+        """Atomically creates a new transaction for this application service
+        with the given list of events.
+
+        Args:
+            service(ApplicationService): The service who the transaction is for.
+            events(list<Event>): A list of events to put in the transaction.
+        Returns:
+            AppServiceTransaction: A new transaction.
+        """
+
+        def _create_appservice_txn(txn):
+            # work out new txn id (highest txn id for this service += 1)
+            # The highest id may be the last one sent (in which case it is last_txn)
+            # or it may be the highest in the txns list (which are waiting to be/are
+            # being sent)
+            last_txn_id = self._get_last_txn(txn, service.id)
+
+            txn.execute(
+                "SELECT MAX(txn_id) FROM application_services_txns WHERE as_id=?",
+                (service.id,),
+            )
+            highest_txn_id = txn.fetchone()[0]
+            if highest_txn_id is None:
+                highest_txn_id = 0
+
+            new_txn_id = max(highest_txn_id, last_txn_id) + 1
+
+            # Insert new txn into txn table
+            event_ids = json.dumps([e.event_id for e in events])
+            txn.execute(
+                "INSERT INTO application_services_txns(as_id, txn_id, event_ids) "
+                "VALUES(?,?,?)",
+                (service.id, new_txn_id, event_ids),
+            )
+            return AppServiceTransaction(service=service, id=new_txn_id, events=events)
+
+        return self.db_pool.runInteraction(
+            "create_appservice_txn", _create_appservice_txn
+        )
+
+    def complete_appservice_txn(self, txn_id, service):
+        """Completes an application service transaction.
+
+        Args:
+            txn_id(str): The transaction ID being completed.
+            service(ApplicationService): The application service which was sent
+            this transaction.
+        Returns:
+            A Deferred which resolves if this transaction was stored
+            successfully.
+        """
+        txn_id = int(txn_id)
+
+        def _complete_appservice_txn(txn):
+            # Debugging query: Make sure the txn being completed is EXACTLY +1 from
+            # what was there before. If it isn't, we've got problems (e.g. the AS
+            # has probably missed some events), so whine loudly but still continue,
+            # since it shouldn't fail completion of the transaction.
+            last_txn_id = self._get_last_txn(txn, service.id)
+            if (last_txn_id + 1) != txn_id:
+                logger.error(
+                    "appservice: Completing a transaction which has an ID > 1 from "
+                    "the last ID sent to this AS. We've either dropped events or "
+                    "sent it to the AS out of order. FIX ME. last_txn=%s "
+                    "completing_txn=%s service_id=%s",
+                    last_txn_id,
+                    txn_id,
+                    service.id,
+                )
+
+            # Set current txn_id for AS to 'txn_id'
+            self.db_pool.simple_upsert_txn(
+                txn,
+                "application_services_state",
+                {"as_id": service.id},
+                {"last_txn": txn_id},
+            )
+
+            # Delete txn
+            self.db_pool.simple_delete_txn(
+                txn,
+                "application_services_txns",
+                {"txn_id": txn_id, "as_id": service.id},
+            )
+
+        return self.db_pool.runInteraction(
+            "complete_appservice_txn", _complete_appservice_txn
+        )
+
+    @defer.inlineCallbacks
+    def get_oldest_unsent_txn(self, service):
+        """Get the oldest transaction which has not been sent for this
+        service.
+
+        Args:
+            service(ApplicationService): The app service to get the oldest txn.
+        Returns:
+            A Deferred which resolves to an AppServiceTransaction or
+            None.
+        """
+
+        def _get_oldest_unsent_txn(txn):
+            # Monotonically increasing txn ids, so just select the smallest
+            # one in the txns table (we delete them when they are sent)
+            txn.execute(
+                "SELECT * FROM application_services_txns WHERE as_id=?"
+                " ORDER BY txn_id ASC LIMIT 1",
+                (service.id,),
+            )
+            rows = self.db_pool.cursor_to_dict(txn)
+            if not rows:
+                return None
+
+            entry = rows[0]
+
+            return entry
+
+        entry = yield self.db_pool.runInteraction(
+            "get_oldest_unsent_appservice_txn", _get_oldest_unsent_txn
+        )
+
+        if not entry:
+            return None
+
+        event_ids = db_to_json(entry["event_ids"])
+
+        events = yield self.get_events_as_list(event_ids)
+
+        return AppServiceTransaction(service=service, id=entry["txn_id"], events=events)
+
+    def _get_last_txn(self, txn, service_id):
+        txn.execute(
+            "SELECT last_txn FROM application_services_state WHERE as_id=?",
+            (service_id,),
+        )
+        last_txn_id = txn.fetchone()
+        if last_txn_id is None or last_txn_id[0] is None:  # no row exists
+            return 0
+        else:
+            return int(last_txn_id[0])  # select 'last_txn' col
+
+    def set_appservice_last_pos(self, pos):
+        def set_appservice_last_pos_txn(txn):
+            txn.execute(
+                "UPDATE appservice_stream_position SET stream_ordering = ?", (pos,)
+            )
+
+        return self.db_pool.runInteraction(
+            "set_appservice_last_pos", set_appservice_last_pos_txn
+        )
+
+    @defer.inlineCallbacks
+    def get_new_events_for_appservice(self, current_id, limit):
+        """Get all new evnets"""
+
+        def get_new_events_for_appservice_txn(txn):
+            sql = (
+                "SELECT e.stream_ordering, e.event_id"
+                " FROM events AS e"
+                " WHERE"
+                " (SELECT stream_ordering FROM appservice_stream_position)"
+                "     < e.stream_ordering"
+                " AND e.stream_ordering <= ?"
+                " ORDER BY e.stream_ordering ASC"
+                " LIMIT ?"
+            )
+
+            txn.execute(sql, (current_id, limit))
+            rows = txn.fetchall()
+
+            upper_bound = current_id
+            if len(rows) == limit:
+                upper_bound = rows[-1][0]
+
+            return upper_bound, [row[1] for row in rows]
+
+        upper_bound, event_ids = yield self.db_pool.runInteraction(
+            "get_new_events_for_appservice", get_new_events_for_appservice_txn
+        )
+
+        events = yield self.get_events_as_list(event_ids)
+
+        return upper_bound, events
+
+
+class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore):
+    # This is currently empty due to there not being any AS storage functions
+    # that can't be run on the workers. Since this may change in future, and
+    # to keep consistency with the other stores, we keep this empty class for
+    # now.
+    pass
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@ -0,0 +1,307 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import itertools
+import logging
+from typing import Any, Iterable, List, Optional, Tuple
+
+from synapse.api.constants import EventTypes
+from synapse.replication.tcp.streams import BackfillStream, CachesStream
+from synapse.replication.tcp.streams.events import (
+    EventsStream,
+    EventsStreamCurrentStateRow,
+    EventsStreamEventRow,
+)
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.engines import PostgresEngine
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+# This is a special cache name we use to batch multiple invalidations of caches
+# based on the current state when notifying workers over replication.
+CURRENT_STATE_CACHE_NAME = "cs_cache_fake"
+
+
+class CacheInvalidationWorkerStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super().__init__(database, db_conn, hs)
+
+        self._instance_name = hs.get_instance_name()
+
+    async def get_all_updated_caches(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        """Get updates for caches replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_updated_caches_txn(txn):
+            # We purposefully don't bound by the current token, as we want to
+            # send across cache invalidations as quickly as possible. Cache
+            # invalidations are idempotent, so duplicates are fine.
+            sql = """
+                SELECT stream_id, cache_func, keys, invalidation_ts
+                FROM cache_invalidation_stream_by_instance
+                WHERE stream_id > ? AND instance_name = ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, instance_name, limit))
+            updates = [(row[0], row[1:]) for row in txn]
+            limited = False
+            upto_token = current_id
+            if len(updates) >= limit:
+                upto_token = updates[-1][0]
+                limited = True
+
+            return updates, upto_token, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_updated_caches", get_all_updated_caches_txn
+        )
+
+    def process_replication_rows(self, stream_name, instance_name, token, rows):
+        if stream_name == EventsStream.NAME:
+            for row in rows:
+                self._process_event_stream_row(token, row)
+        elif stream_name == BackfillStream.NAME:
+            for row in rows:
+                self._invalidate_caches_for_event(
+                    -token,
+                    row.event_id,
+                    row.room_id,
+                    row.type,
+                    row.state_key,
+                    row.redacts,
+                    row.relates_to,
+                    backfilled=True,
+                )
+        elif stream_name == CachesStream.NAME:
+            if self._cache_id_gen:
+                self._cache_id_gen.advance(instance_name, token)
+
+            for row in rows:
+                if row.cache_func == CURRENT_STATE_CACHE_NAME:
+                    if row.keys is None:
+                        raise Exception(
+                            "Can't send an 'invalidate all' for current state cache"
+                        )
+
+                    room_id = row.keys[0]
+                    members_changed = set(row.keys[1:])
+                    self._invalidate_state_caches(room_id, members_changed)
+                else:
+                    self._attempt_to_invalidate_cache(row.cache_func, row.keys)
+
+        super().process_replication_rows(stream_name, instance_name, token, rows)
+
+    def _process_event_stream_row(self, token, row):
+        data = row.data
+
+        if row.type == EventsStreamEventRow.TypeId:
+            self._invalidate_caches_for_event(
+                token,
+                data.event_id,
+                data.room_id,
+                data.type,
+                data.state_key,
+                data.redacts,
+                data.relates_to,
+                backfilled=False,
+            )
+        elif row.type == EventsStreamCurrentStateRow.TypeId:
+            self._curr_state_delta_stream_cache.entity_has_changed(
+                row.data.room_id, token
+            )
+
+            if data.type == EventTypes.Member:
+                self.get_rooms_for_user_with_stream_ordering.invalidate(
+                    (data.state_key,)
+                )
+        else:
+            raise Exception("Unknown events stream row type %s" % (row.type,))
+
+    def _invalidate_caches_for_event(
+        self,
+        stream_ordering,
+        event_id,
+        room_id,
+        etype,
+        state_key,
+        redacts,
+        relates_to,
+        backfilled,
+    ):
+        self._invalidate_get_event_cache(event_id)
+
+        self.get_latest_event_ids_in_room.invalidate((room_id,))
+
+        self.get_unread_message_count_for_user.invalidate_many((room_id,))
+        self.get_unread_event_push_actions_by_room_for_user.invalidate_many((room_id,))
+
+        if not backfilled:
+            self._events_stream_cache.entity_has_changed(room_id, stream_ordering)
+
+        if redacts:
+            self._invalidate_get_event_cache(redacts)
+
+        if etype == EventTypes.Member:
+            self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
+            self.get_invited_rooms_for_local_user.invalidate((state_key,))
+
+        if relates_to:
+            self.get_relations_for_event.invalidate_many((relates_to,))
+            self.get_aggregation_groups_for_event.invalidate_many((relates_to,))
+            self.get_applicable_edit.invalidate((relates_to,))
+
+    async def invalidate_cache_and_stream(self, cache_name: str, keys: Tuple[Any, ...]):
+        """Invalidates the cache and adds it to the cache stream so slaves
+        will know to invalidate their caches.
+
+        This should only be used to invalidate caches where slaves won't
+        otherwise know from other replication streams that the cache should
+        be invalidated.
+        """
+        cache_func = getattr(self, cache_name, None)
+        if not cache_func:
+            return
+
+        cache_func.invalidate(keys)
+        await self.db_pool.runInteraction(
+            "invalidate_cache_and_stream",
+            self._send_invalidation_to_replication,
+            cache_func.__name__,
+            keys,
+        )
+
+    def _invalidate_cache_and_stream(self, txn, cache_func, keys):
+        """Invalidates the cache and adds it to the cache stream so slaves
+        will know to invalidate their caches.
+
+        This should only be used to invalidate caches where slaves won't
+        otherwise know from other replication streams that the cache should
+        be invalidated.
+        """
+        txn.call_after(cache_func.invalidate, keys)
+        self._send_invalidation_to_replication(txn, cache_func.__name__, keys)
+
+    def _invalidate_all_cache_and_stream(self, txn, cache_func):
+        """Invalidates the entire cache and adds it to the cache stream so slaves
+        will know to invalidate their caches.
+        """
+
+        txn.call_after(cache_func.invalidate_all)
+        self._send_invalidation_to_replication(txn, cache_func.__name__, None)
+
+    def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed):
+        """Special case invalidation of caches based on current state.
+
+        We special case this so that we can batch the cache invalidations into a
+        single replication poke.
+
+        Args:
+            txn
+            room_id (str): Room where state changed
+            members_changed (iterable[str]): The user_ids of members that have changed
+        """
+        txn.call_after(self._invalidate_state_caches, room_id, members_changed)
+
+        if members_changed:
+            # We need to be careful that the size of the `members_changed` list
+            # isn't so large that it causes problems sending over replication, so we
+            # send them in chunks.
+            # Max line length is 16K, and max user ID length is 255, so 50 should
+            # be safe.
+            for chunk in batch_iter(members_changed, 50):
+                keys = itertools.chain([room_id], chunk)
+                self._send_invalidation_to_replication(
+                    txn, CURRENT_STATE_CACHE_NAME, keys
+                )
+        else:
+            # if no members changed, we still need to invalidate the other caches.
+            self._send_invalidation_to_replication(
+                txn, CURRENT_STATE_CACHE_NAME, [room_id]
+            )
+
+    def _send_invalidation_to_replication(
+        self, txn, cache_name: str, keys: Optional[Iterable[Any]]
+    ):
+        """Notifies replication that given cache has been invalidated.
+
+        Note that this does *not* invalidate the cache locally.
+
+        Args:
+            txn
+            cache_name
+            keys: Entry to invalidate. If None will invalidate all.
+        """
+
+        if cache_name == CURRENT_STATE_CACHE_NAME and keys is None:
+            raise Exception(
+                "Can't stream invalidate all with magic current state cache"
+            )
+
+        if isinstance(self.database_engine, PostgresEngine):
+            # get_next() returns a context manager which is designed to wrap
+            # the transaction. However, we want to only get an ID when we want
+            # to use it, here, so we need to call __enter__ manually, and have
+            # __exit__ called after the transaction finishes.
+            stream_id = self._cache_id_gen.get_next_txn(txn)
+            txn.call_after(self.hs.get_notifier().on_new_replication_data)
+
+            if keys is not None:
+                keys = list(keys)
+
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="cache_invalidation_stream_by_instance",
+                values={
+                    "stream_id": stream_id,
+                    "instance_name": self._instance_name,
+                    "cache_func": cache_name,
+                    "keys": keys,
+                    "invalidation_ts": self.clock.time_msec(),
+                },
+            )
+
+    def get_cache_stream_token(self, instance_name):
+        if self._cache_id_gen:
+            return self._cache_id_gen.get_current_token(instance_name)
+        else:
+            return 0
--- a/synapse/storage/databases/main/censor_events.py
+++ b/synapse/storage/databases/main/censor_events.py
@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING
+
+from twisted.internet import defer
+
+from synapse.events.utils import prune_event_dict
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
+from synapse.storage.databases.main.events import encode_json
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+logger = logging.getLogger(__name__)
+
+
+class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"):
+        super().__init__(database, db_conn, hs)
+
+        def _censor_redactions():
+            return run_as_background_process(
+                "_censor_redactions", self._censor_redactions
+            )
+
+        if self.hs.config.redaction_retention_period is not None:
+            hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000)
+
+    async def _censor_redactions(self):
+        """Censors all redactions older than the configured period that haven't
+        been censored yet.
+
+        By censor we mean update the event_json table with the redacted event.
+        """
+
+        if self.hs.config.redaction_retention_period is None:
+            return
+
+        if not (
+            await self.db_pool.updates.has_completed_background_update(
+                "redactions_have_censored_ts_idx"
+            )
+        ):
+            # We don't want to run this until the appropriate index has been
+            # created.
+            return
+
+        before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period
+
+        # We fetch all redactions that:
+        #   1. point to an event we have,
+        #   2. has a received_ts from before the cut off, and
+        #   3. we haven't yet censored.
+        #
+        # This is limited to 100 events to ensure that we don't try and do too
+        # much at once. We'll get called again so this should eventually catch
+        # up.
+        sql = """
+            SELECT redactions.event_id, redacts FROM redactions
+            LEFT JOIN events AS original_event ON (
+                redacts = original_event.event_id
+            )
+            WHERE NOT have_censored
+            AND redactions.received_ts <= ?
+            ORDER BY redactions.received_ts ASC
+            LIMIT ?
+        """
+
+        rows = await self.db_pool.execute(
+            "_censor_redactions_fetch", None, sql, before_ts, 100
+        )
+
+        updates = []
+
+        for redaction_id, event_id in rows:
+            redaction_event = await self.get_event(redaction_id, allow_none=True)
+            original_event = await self.get_event(
+                event_id, allow_rejected=True, allow_none=True
+            )
+
+            # The SQL above ensures that we have both the redaction and
+            # original event, so if the `get_event` calls return None it
+            # means that the redaction wasn't allowed. Either way we know that
+            # the result won't change so we mark the fact that we've checked.
+            if (
+                redaction_event
+                and original_event
+                and original_event.internal_metadata.is_redacted()
+            ):
+                # Redaction was allowed
+                pruned_json = encode_json(
+                    prune_event_dict(
+                        original_event.room_version, original_event.get_dict()
+                    )
+                )
+            else:
+                # Redaction wasn't allowed
+                pruned_json = None
+
+            updates.append((redaction_id, event_id, pruned_json))
+
+        def _update_censor_txn(txn):
+            for redaction_id, event_id, pruned_json in updates:
+                if pruned_json:
+                    self._censor_event_txn(txn, event_id, pruned_json)
+
+                self.db_pool.simple_update_one_txn(
+                    txn,
+                    table="redactions",
+                    keyvalues={"event_id": redaction_id},
+                    updatevalues={"have_censored": True},
+                )
+
+        await self.db_pool.runInteraction("_update_censor_txn", _update_censor_txn)
+
+    def _censor_event_txn(self, txn, event_id, pruned_json):
+        """Censor an event by replacing its JSON in the event_json table with the
+        provided pruned JSON.
+
+        Args:
+            txn (LoggingTransaction): The database transaction.
+            event_id (str): The ID of the event to censor.
+            pruned_json (str): The pruned JSON
+        """
+        self.db_pool.simple_update_one_txn(
+            txn,
+            table="event_json",
+            keyvalues={"event_id": event_id},
+            updatevalues={"json": pruned_json},
+        )
+
+    @defer.inlineCallbacks
+    def expire_event(self, event_id):
+        """Retrieve and expire an event that has expired, and delete its associated
+        expiry timestamp. If the event can't be retrieved, delete its associated
+        timestamp so we don't try to expire it again in the future.
+
+        Args:
+             event_id (str): The ID of the event to delete.
+        """
+        # Try to retrieve the event's content from the database or the event cache.
+        event = yield self.get_event(event_id)
+
+        def delete_expired_event_txn(txn):
+            # Delete the expiry timestamp associated with this event from the database.
+            self._delete_event_expiry_txn(txn, event_id)
+
+            if not event:
+                # If we can't find the event, log a warning and delete the expiry date
+                # from the database so that we don't try to expire it again in the
+                # future.
+                logger.warning(
+                    "Can't expire event %s because we don't have it.", event_id
+                )
+                return
+
+            # Prune the event's dict then convert it to JSON.
+            pruned_json = encode_json(
+                prune_event_dict(event.room_version, event.get_dict())
+            )
+
+            # Update the event_json table to replace the event's JSON with the pruned
+            # JSON.
+            self._censor_event_txn(txn, event.event_id, pruned_json)
+
+            # We need to invalidate the event cache entry for this event because we
+            # changed its content in the database. We can't call
+            # self._invalidate_cache_and_stream because self.get_event_cache isn't of the
+            # right type.
+            txn.call_after(self._get_event_cache.invalidate, (event.event_id,))
+            # Send that invalidation to replication so that other workers also invalidate
+            # the event cache.
+            self._send_invalidation_to_replication(
+                txn, "_get_event_cache", (event.event_id,)
+            )
+
+        yield self.db_pool.runInteraction(
+            "delete_expired_event", delete_expired_event_txn
+        )
+
+    def _delete_event_expiry_txn(self, txn, event_id):
+        """Delete the expiry timestamp associated with an event ID without deleting the
+        actual event.
+
+        Args:
+            txn (LoggingTransaction): The transaction to use to perform the deletion.
+            event_id (str): The event ID to delete the associated expiry timestamp of.
+        """
+        return self.db_pool.simple_delete_txn(
+            txn=txn, table="event_expiry", keyvalues={"event_id": event_id}
+        )
--- a/synapse/storage/databases/main/client_ips.py
+++ b/synapse/storage/databases/main/client_ips.py
@ -0,0 +1,580 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, make_tuple_comparison_clause
+from synapse.util.caches.descriptors import Cache
+
+logger = logging.getLogger(__name__)
+
+# Number of msec of granularity to store the user IP 'last seen' time. Smaller
+# times give more inserts into the database even for readonly API hits
+# 120 seconds == 2 minutes
+LAST_SEEN_GRANULARITY = 120 * 1000
+
+
+class ClientIpBackgroundUpdateStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(ClientIpBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_index_update(
+            "user_ips_device_index",
+            index_name="user_ips_device_id",
+            table="user_ips",
+            columns=["user_id", "device_id", "last_seen"],
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "user_ips_last_seen_index",
+            index_name="user_ips_last_seen",
+            table="user_ips",
+            columns=["user_id", "last_seen"],
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "user_ips_last_seen_only_index",
+            index_name="user_ips_last_seen_only",
+            table="user_ips",
+            columns=["last_seen"],
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "user_ips_analyze", self._analyze_user_ip
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "user_ips_remove_dupes", self._remove_user_ip_dupes
+        )
+
+        # Register a unique index
+        self.db_pool.updates.register_background_index_update(
+            "user_ips_device_unique_index",
+            index_name="user_ips_user_token_ip_unique_index",
+            table="user_ips",
+            columns=["user_id", "access_token", "ip"],
+            unique=True,
+        )
+
+        # Drop the old non-unique index
+        self.db_pool.updates.register_background_update_handler(
+            "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique
+        )
+
+        # Update the last seen info in devices.
+        self.db_pool.updates.register_background_update_handler(
+            "devices_last_seen", self._devices_last_seen_update
+        )
+
+    @defer.inlineCallbacks
+    def _remove_user_ip_nonunique(self, progress, batch_size):
+        def f(conn):
+            txn = conn.cursor()
+            txn.execute("DROP INDEX IF EXISTS user_ips_user_ip")
+            txn.close()
+
+        yield self.db_pool.runWithConnection(f)
+        yield self.db_pool.updates._end_background_update(
+            "user_ips_drop_nonunique_index"
+        )
+        return 1
+
+    @defer.inlineCallbacks
+    def _analyze_user_ip(self, progress, batch_size):
+        # Background update to analyze user_ips table before we run the
+        # deduplication background update. The table may not have been analyzed
+        # for ages due to the table locks.
+        #
+        # This will lock out the naive upserts to user_ips while it happens, but
+        # the analyze should be quick (28GB table takes ~10s)
+        def user_ips_analyze(txn):
+            txn.execute("ANALYZE user_ips")
+
+        yield self.db_pool.runInteraction("user_ips_analyze", user_ips_analyze)
+
+        yield self.db_pool.updates._end_background_update("user_ips_analyze")
+
+        return 1
+
+    @defer.inlineCallbacks
+    def _remove_user_ip_dupes(self, progress, batch_size):
+        # This works function works by scanning the user_ips table in batches
+        # based on `last_seen`. For each row in a batch it searches the rest of
+        # the table to see if there are any duplicates, if there are then they
+        # are removed and replaced with a suitable row.
+
+        # Fetch the start of the batch
+        begin_last_seen = progress.get("last_seen", 0)
+
+        def get_last_seen(txn):
+            txn.execute(
+                """
+                SELECT last_seen FROM user_ips
+                WHERE last_seen > ?
+                ORDER BY last_seen
+                LIMIT 1
+                OFFSET ?
+                """,
+                (begin_last_seen, batch_size),
+            )
+            row = txn.fetchone()
+            if row:
+                return row[0]
+            else:
+                return None
+
+        # Get a last seen that has roughly `batch_size` since `begin_last_seen`
+        end_last_seen = yield self.db_pool.runInteraction(
+            "user_ips_dups_get_last_seen", get_last_seen
+        )
+
+        # If it returns None, then we're processing the last batch
+        last = end_last_seen is None
+
+        logger.info(
+            "Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s",
+            begin_last_seen,
+            end_last_seen,
+        )
+
+        def remove(txn):
+            # This works by looking at all entries in the given time span, and
+            # then for each (user_id, access_token, ip) tuple in that range
+            # checking for any duplicates in the rest of the table (via a join).
+            # It then only returns entries which have duplicates, and the max
+            # last_seen across all duplicates, which can the be used to delete
+            # all other duplicates.
+            # It is efficient due to the existence of (user_id, access_token,
+            # ip) and (last_seen) indices.
+
+            # Define the search space, which requires handling the last batch in
+            # a different way
+            if last:
+                clause = "? <= last_seen"
+                args = (begin_last_seen,)
+            else:
+                clause = "? <= last_seen AND last_seen < ?"
+                args = (begin_last_seen, end_last_seen)
+
+            # (Note: The DISTINCT in the inner query is important to ensure that
+            # the COUNT(*) is accurate, otherwise double counting may happen due
+            # to the join effectively being a cross product)
+            txn.execute(
+                """
+                SELECT user_id, access_token, ip,
+                       MAX(device_id), MAX(user_agent), MAX(last_seen),
+                       COUNT(*)
+                FROM (
+                    SELECT DISTINCT user_id, access_token, ip
+                    FROM user_ips
+                    WHERE {}
+                ) c
+                INNER JOIN user_ips USING (user_id, access_token, ip)
+                GROUP BY user_id, access_token, ip
+                HAVING count(*) > 1
+                """.format(
+                    clause
+                ),
+                args,
+            )
+            res = txn.fetchall()
+
+            # We've got some duplicates
+            for i in res:
+                user_id, access_token, ip, device_id, user_agent, last_seen, count = i
+
+                # We want to delete the duplicates so we end up with only a
+                # single row.
+                #
+                # The naive way of doing this would be just to delete all rows
+                # and reinsert a constructed row. However, if there are a lot of
+                # duplicate rows this can cause the table to grow a lot, which
+                # can be problematic in two ways:
+                #   1. If user_ips is already large then this can cause the
+                #      table to rapidly grow, potentially filling the disk.
+                #   2. Reinserting a lot of rows can confuse the table
+                #      statistics for postgres, causing it to not use the
+                #      correct indices for the query above, resulting in a full
+                #      table scan. This is incredibly slow for large tables and
+                #      can kill database performance. (This seems to mainly
+                #      happen for the last query where the clause is simply `? <
+                #      last_seen`)
+                #
+                # So instead we want to delete all but *one* of the duplicate
+                # rows. That is hard to do reliably, so we cheat and do a two
+                # step process:
+                #   1. Delete all rows with a last_seen strictly less than the
+                #      max last_seen. This hopefully results in deleting all but
+                #      one row the majority of the time, but there may be
+                #      duplicate last_seen
+                #   2. If multiple rows remain, we fall back to the naive method
+                #      and simply delete all rows and reinsert.
+                #
+                # Note that this relies on no new duplicate rows being inserted,
+                # but if that is happening then this entire process is futile
+                # anyway.
+
+                # Do step 1:
+
+                txn.execute(
+                    """
+                    DELETE FROM user_ips
+                    WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ?
+                    """,
+                    (user_id, access_token, ip, last_seen),
+                )
+                if txn.rowcount == count - 1:
+                    # We deleted all but one of the duplicate rows, i.e. there
+                    # is exactly one remaining and so there is nothing left to
+                    # do.
+                    continue
+                elif txn.rowcount >= count:
+                    raise Exception(
+                        "We deleted more duplicate rows from 'user_ips' than expected"
+                    )
+
+                # The previous step didn't delete enough rows, so we fallback to
+                # step 2:
+
+                # Drop all the duplicates
+                txn.execute(
+                    """
+                    DELETE FROM user_ips
+                    WHERE user_id = ? AND access_token = ? AND ip = ?
+                    """,
+                    (user_id, access_token, ip),
+                )
+
+                # Add in one to be the last_seen
+                txn.execute(
+                    """
+                    INSERT INTO user_ips
+                    (user_id, access_token, ip, device_id, user_agent, last_seen)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (user_id, access_token, ip, device_id, user_agent, last_seen),
+                )
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "user_ips_remove_dupes", {"last_seen": end_last_seen}
+            )
+
+        yield self.db_pool.runInteraction("user_ips_dups_remove", remove)
+
+        if last:
+            yield self.db_pool.updates._end_background_update("user_ips_remove_dupes")
+
+        return batch_size
+
+    @defer.inlineCallbacks
+    def _devices_last_seen_update(self, progress, batch_size):
+        """Background update to insert last seen info into devices table
+        """
+
+        last_user_id = progress.get("last_user_id", "")
+        last_device_id = progress.get("last_device_id", "")
+
+        def _devices_last_seen_update_txn(txn):
+            # This consists of two queries:
+            #
+            #   1. The sub-query searches for the next N devices and joins
+            #      against user_ips to find the max last_seen associated with
+            #      that device.
+            #   2. The outer query then joins again against user_ips on
+            #      user/device/last_seen. This *should* hopefully only
+            #      return one row, but if it does return more than one then
+            #      we'll just end up updating the same device row multiple
+            #      times, which is fine.
+
+            where_clause, where_args = make_tuple_comparison_clause(
+                self.database_engine,
+                [("user_id", last_user_id), ("device_id", last_device_id)],
+            )
+
+            sql = """
+                SELECT
+                    last_seen, ip, user_agent, user_id, device_id
+                FROM (
+                    SELECT
+                        user_id, device_id, MAX(u.last_seen) AS last_seen
+                    FROM devices
+                    INNER JOIN user_ips AS u USING (user_id, device_id)
+                    WHERE %(where_clause)s
+                    GROUP BY user_id, device_id
+                    ORDER BY user_id ASC, device_id ASC
+                    LIMIT ?
+                ) c
+                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
+            """ % {
+                "where_clause": where_clause
+            }
+            txn.execute(sql, where_args + [batch_size])
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            sql = """
+                UPDATE devices
+                SET last_seen = ?, ip = ?, user_agent = ?
+                WHERE user_id = ? AND device_id = ?
+            """
+            txn.execute_batch(sql, rows)
+
+            _, _, _, user_id, device_id = rows[-1]
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                "devices_last_seen",
+                {"last_user_id": user_id, "last_device_id": device_id},
+            )
+
+            return len(rows)
+
+        updated = yield self.db_pool.runInteraction(
+            "_devices_last_seen_update", _devices_last_seen_update_txn
+        )
+
+        if not updated:
+            yield self.db_pool.updates._end_background_update("devices_last_seen")
+
+        return updated
+
+
+class ClientIpStore(ClientIpBackgroundUpdateStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+
+        self.client_ip_last_seen = Cache(
+            name="client_ip_last_seen", keylen=4, max_entries=50000
+        )
+
+        super(ClientIpStore, self).__init__(database, db_conn, hs)
+
+        self.user_ips_max_age = hs.config.user_ips_max_age
+
+        # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
+        self._batch_row_update = {}
+
+        self._client_ip_looper = self._clock.looping_call(
+            self._update_client_ips_batch, 5 * 1000
+        )
+        self.hs.get_reactor().addSystemEventTrigger(
+            "before", "shutdown", self._update_client_ips_batch
+        )
+
+        if self.user_ips_max_age:
+            self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
+
+    @defer.inlineCallbacks
+    def insert_client_ip(
+        self, user_id, access_token, ip, user_agent, device_id, now=None
+    ):
+        if not now:
+            now = int(self._clock.time_msec())
+        key = (user_id, access_token, ip)
+
+        try:
+            last_seen = self.client_ip_last_seen.get(key)
+        except KeyError:
+            last_seen = None
+        yield self.populate_monthly_active_users(user_id)
+        # Rate-limited inserts
+        if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
+            return
+
+        self.client_ip_last_seen.prefill(key, now)
+
+        self._batch_row_update[key] = (user_agent, device_id, now)
+
+    @wrap_as_background_process("update_client_ips")
+    def _update_client_ips_batch(self):
+
+        # If the DB pool has already terminated, don't try updating
+        if not self.db_pool.is_running():
+            return
+
+        to_update = self._batch_row_update
+        self._batch_row_update = {}
+
+        return self.db_pool.runInteraction(
+            "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
+        )
+
+    def _update_client_ips_batch_txn(self, txn, to_update):
+        if "user_ips" in self.db_pool._unsafe_to_upsert_tables or (
+            not self.database_engine.can_native_upsert
+        ):
+            self.database_engine.lock_table(txn, "user_ips")
+
+        for entry in to_update.items():
+            (user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
+
+            try:
+                self.db_pool.simple_upsert_txn(
+                    txn,
+                    table="user_ips",
+                    keyvalues={
+                        "user_id": user_id,
+                        "access_token": access_token,
+                        "ip": ip,
+                    },
+                    values={
+                        "user_agent": user_agent,
+                        "device_id": device_id,
+                        "last_seen": last_seen,
+                    },
+                    lock=False,
+                )
+
+                # Technically an access token might not be associated with
+                # a device so we need to check.
+                if device_id:
+                    # this is always an update rather than an upsert: the row should
+                    # already exist, and if it doesn't, that may be because it has been
+                    # deleted, and we don't want to re-create it.
+                    self.db_pool.simple_update_txn(
+                        txn,
+                        table="devices",
+                        keyvalues={"user_id": user_id, "device_id": device_id},
+                        updatevalues={
+                            "user_agent": user_agent,
+                            "last_seen": last_seen,
+                            "ip": ip,
+                        },
+                    )
+            except Exception as e:
+                # Failed to upsert, log and continue
+                logger.error("Failed to insert client IP %r: %r", entry, e)
+
+    @defer.inlineCallbacks
+    def get_last_client_ip_by_device(self, user_id, device_id):
+        """For each device_id listed, give the user_ip it was last seen on
+
+        Args:
+            user_id (str)
+            device_id (str): If None fetches all devices for the user
+
+        Returns:
+            defer.Deferred: resolves to a dict, where the keys
+            are (user_id, device_id) tuples. The values are also dicts, with
+            keys giving the column names
+        """
+
+        keyvalues = {"user_id": user_id}
+        if device_id is not None:
+            keyvalues["device_id"] = device_id
+
+        res = yield self.db_pool.simple_select_list(
+            table="devices",
+            keyvalues=keyvalues,
+            retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"),
+        )
+
+        ret = {(d["user_id"], d["device_id"]): d for d in res}
+        for key in self._batch_row_update:
+            uid, access_token, ip = key
+            if uid == user_id:
+                user_agent, did, last_seen = self._batch_row_update[key]
+                if not device_id or did == device_id:
+                    ret[(user_id, device_id)] = {
+                        "user_id": user_id,
+                        "access_token": access_token,
+                        "ip": ip,
+                        "user_agent": user_agent,
+                        "device_id": did,
+                        "last_seen": last_seen,
+                    }
+        return ret
+
+    @defer.inlineCallbacks
+    def get_user_ip_and_agents(self, user):
+        user_id = user.to_string()
+        results = {}
+
+        for key in self._batch_row_update:
+            uid, access_token, ip, = key
+            if uid == user_id:
+                user_agent, _, last_seen = self._batch_row_update[key]
+                results[(access_token, ip)] = (user_agent, last_seen)
+
+        rows = yield self.db_pool.simple_select_list(
+            table="user_ips",
+            keyvalues={"user_id": user_id},
+            retcols=["access_token", "ip", "user_agent", "last_seen"],
+            desc="get_user_ip_and_agents",
+        )
+
+        results.update(
+            ((row["access_token"], row["ip"]), (row["user_agent"], row["last_seen"]))
+            for row in rows
+        )
+        return [
+            {
+                "access_token": access_token,
+                "ip": ip,
+                "user_agent": user_agent,
+                "last_seen": last_seen,
+            }
+            for (access_token, ip), (user_agent, last_seen) in results.items()
+        ]
+
+    @wrap_as_background_process("prune_old_user_ips")
+    async def _prune_old_user_ips(self):
+        """Removes entries in user IPs older than the configured period.
+        """
+
+        if self.user_ips_max_age is None:
+            # Nothing to do
+            return
+
+        if not await self.db_pool.updates.has_completed_background_update(
+            "devices_last_seen"
+        ):
+            # Only start pruning if we have finished populating the devices
+            # last seen info.
+            return
+
+        # We do a slightly funky SQL delete to ensure we don't try and delete
+        # too much at once (as the table may be very large from before we
+        # started pruning).
+        #
+        # This works by finding the max last_seen that is less than the given
+        # time, but has no more than N rows before it, deleting all rows with
+        # a lesser last_seen time. (We COALESCE so that the sub-SELECT always
+        # returns exactly one row).
+        sql = """
+            DELETE FROM user_ips
+            WHERE last_seen <= (
+                SELECT COALESCE(MAX(last_seen), -1)
+                FROM (
+                    SELECT last_seen FROM user_ips
+                    WHERE last_seen <= ?
+                    ORDER BY last_seen ASC
+                    LIMIT 5000
+                ) AS u
+            )
+        """
+
+        timestamp = self.clock.time_msec() - self.user_ips_max_age
+
+        def _prune_old_user_ips_txn(txn):
+            txn.execute(sql, (timestamp,))
+
+        await self.db_pool.runInteraction(
+            "_prune_old_user_ips", _prune_old_user_ips_txn
+        )
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@ -0,0 +1,476 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import List, Tuple
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.util.caches.expiringcache import ExpiringCache
+
+logger = logging.getLogger(__name__)
+
+
+class DeviceInboxWorkerStore(SQLBaseStore):
+    def get_to_device_stream_token(self):
+        return self._device_inbox_id_gen.get_current_token()
+
+    def get_new_messages_for_device(
+        self, user_id, device_id, last_stream_id, current_stream_id, limit=100
+    ):
+        """
+        Args:
+            user_id(str): The recipient user_id.
+            device_id(str): The recipient device_id.
+            current_stream_id(int): The current position of the to device
+                message stream.
+        Returns:
+            Deferred ([dict], int): List of messages for the device and where
+                in the stream the messages got to.
+        """
+        has_changed = self._device_inbox_stream_cache.has_entity_changed(
+            user_id, last_stream_id
+        )
+        if not has_changed:
+            return defer.succeed(([], current_stream_id))
+
+        def get_new_messages_for_device_txn(txn):
+            sql = (
+                "SELECT stream_id, message_json FROM device_inbox"
+                " WHERE user_id = ? AND device_id = ?"
+                " AND ? < stream_id AND stream_id <= ?"
+                " ORDER BY stream_id ASC"
+                " LIMIT ?"
+            )
+            txn.execute(
+                sql, (user_id, device_id, last_stream_id, current_stream_id, limit)
+            )
+            messages = []
+            for row in txn:
+                stream_pos = row[0]
+                messages.append(db_to_json(row[1]))
+            if len(messages) < limit:
+                stream_pos = current_stream_id
+            return messages, stream_pos
+
+        return self.db_pool.runInteraction(
+            "get_new_messages_for_device", get_new_messages_for_device_txn
+        )
+
+    @trace
+    @defer.inlineCallbacks
+    def delete_messages_for_device(self, user_id, device_id, up_to_stream_id):
+        """
+        Args:
+            user_id(str): The recipient user_id.
+            device_id(str): The recipient device_id.
+            up_to_stream_id(int): Where to delete messages up to.
+        Returns:
+            A deferred that resolves to the number of messages deleted.
+        """
+        # If we have cached the last stream id we've deleted up to, we can
+        # check if there is likely to be anything that needs deleting
+        last_deleted_stream_id = self._last_device_delete_cache.get(
+            (user_id, device_id), None
+        )
+
+        set_tag("last_deleted_stream_id", last_deleted_stream_id)
+
+        if last_deleted_stream_id:
+            has_changed = self._device_inbox_stream_cache.has_entity_changed(
+                user_id, last_deleted_stream_id
+            )
+            if not has_changed:
+                log_kv({"message": "No changes in cache since last check"})
+                return 0
+
+        def delete_messages_for_device_txn(txn):
+            sql = (
+                "DELETE FROM device_inbox"
+                " WHERE user_id = ? AND device_id = ?"
+                " AND stream_id <= ?"
+            )
+            txn.execute(sql, (user_id, device_id, up_to_stream_id))
+            return txn.rowcount
+
+        count = yield self.db_pool.runInteraction(
+            "delete_messages_for_device", delete_messages_for_device_txn
+        )
+
+        log_kv(
+            {"message": "deleted {} messages for device".format(count), "count": count}
+        )
+
+        # Update the cache, ensuring that we only ever increase the value
+        last_deleted_stream_id = self._last_device_delete_cache.get(
+            (user_id, device_id), 0
+        )
+        self._last_device_delete_cache[(user_id, device_id)] = max(
+            last_deleted_stream_id, up_to_stream_id
+        )
+
+        return count
+
+    @trace
+    def get_new_device_msgs_for_remote(
+        self, destination, last_stream_id, current_stream_id, limit
+    ):
+        """
+        Args:
+            destination(str): The name of the remote server.
+            last_stream_id(int|long): The last position of the device message stream
+                that the server sent up to.
+            current_stream_id(int|long): The current position of the device
+                message stream.
+        Returns:
+            Deferred ([dict], int|long): List of messages for the device and where
+                in the stream the messages got to.
+        """
+
+        set_tag("destination", destination)
+        set_tag("last_stream_id", last_stream_id)
+        set_tag("current_stream_id", current_stream_id)
+        set_tag("limit", limit)
+
+        has_changed = self._device_federation_outbox_stream_cache.has_entity_changed(
+            destination, last_stream_id
+        )
+        if not has_changed or last_stream_id == current_stream_id:
+            log_kv({"message": "No new messages in stream"})
+            return defer.succeed(([], current_stream_id))
+
+        if limit <= 0:
+            # This can happen if we run out of room for EDUs in the transaction.
+            return defer.succeed(([], last_stream_id))
+
+        @trace
+        def get_new_messages_for_remote_destination_txn(txn):
+            sql = (
+                "SELECT stream_id, messages_json FROM device_federation_outbox"
+                " WHERE destination = ?"
+                " AND ? < stream_id AND stream_id <= ?"
+                " ORDER BY stream_id ASC"
+                " LIMIT ?"
+            )
+            txn.execute(sql, (destination, last_stream_id, current_stream_id, limit))
+            messages = []
+            for row in txn:
+                stream_pos = row[0]
+                messages.append(db_to_json(row[1]))
+            if len(messages) < limit:
+                log_kv({"message": "Set stream position to current position"})
+                stream_pos = current_stream_id
+            return messages, stream_pos
+
+        return self.db_pool.runInteraction(
+            "get_new_device_msgs_for_remote",
+            get_new_messages_for_remote_destination_txn,
+        )
+
+    @trace
+    def delete_device_msgs_for_remote(self, destination, up_to_stream_id):
+        """Used to delete messages when the remote destination acknowledges
+        their receipt.
+
+        Args:
+            destination(str): The destination server_name
+            up_to_stream_id(int): Where to delete messages up to.
+        Returns:
+            A deferred that resolves when the messages have been deleted.
+        """
+
+        def delete_messages_for_remote_destination_txn(txn):
+            sql = (
+                "DELETE FROM device_federation_outbox"
+                " WHERE destination = ?"
+                " AND stream_id <= ?"
+            )
+            txn.execute(sql, (destination, up_to_stream_id))
+
+        return self.db_pool.runInteraction(
+            "delete_device_msgs_for_remote", delete_messages_for_remote_destination_txn
+        )
+
+    async def get_all_new_device_messages(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        """Get updates for to device replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_new_device_messages_txn(txn):
+            # We limit like this as we might have multiple rows per stream_id, and
+            # we want to make sure we always get all entries for any stream_id
+            # we return.
+            upper_pos = min(current_id, last_id + limit)
+            sql = (
+                "SELECT max(stream_id), user_id"
+                " FROM device_inbox"
+                " WHERE ? < stream_id AND stream_id <= ?"
+                " GROUP BY user_id"
+            )
+            txn.execute(sql, (last_id, upper_pos))
+            updates = [(row[0], row[1:]) for row in txn]
+
+            sql = (
+                "SELECT max(stream_id), destination"
+                " FROM device_federation_outbox"
+                " WHERE ? < stream_id AND stream_id <= ?"
+                " GROUP BY destination"
+            )
+            txn.execute(sql, (last_id, upper_pos))
+            updates.extend((row[0], row[1:]) for row in txn)
+
+            # Order by ascending stream ordering
+            updates.sort()
+
+            limited = False
+            upto_token = current_id
+            if len(updates) >= limit:
+                upto_token = updates[-1][0]
+                limited = True
+
+            return updates, upto_token, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_new_device_messages", get_all_new_device_messages_txn
+        )
+
+
+class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
+    DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(DeviceInboxBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_index_update(
+            "device_inbox_stream_index",
+            index_name="device_inbox_stream_id_user_id",
+            table="device_inbox",
+            columns=["stream_id", "user_id"],
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox
+        )
+
+    @defer.inlineCallbacks
+    def _background_drop_index_device_inbox(self, progress, batch_size):
+        def reindex_txn(conn):
+            txn = conn.cursor()
+            txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id")
+            txn.close()
+
+        yield self.db_pool.runWithConnection(reindex_txn)
+
+        yield self.db_pool.updates._end_background_update(self.DEVICE_INBOX_STREAM_ID)
+
+        return 1
+
+
+class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
+    DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(DeviceInboxStore, self).__init__(database, db_conn, hs)
+
+        # Map of (user_id, device_id) to the last stream_id that has been
+        # deleted up to. This is so that we can no op deletions.
+        self._last_device_delete_cache = ExpiringCache(
+            cache_name="last_device_delete_cache",
+            clock=self._clock,
+            max_len=10000,
+            expiry_ms=30 * 60 * 1000,
+        )
+
+    @trace
+    @defer.inlineCallbacks
+    def add_messages_to_device_inbox(
+        self, local_messages_by_user_then_device, remote_messages_by_destination
+    ):
+        """Used to send messages from this server.
+
+        Args:
+            sender_user_id(str): The ID of the user sending these messages.
+            local_messages_by_user_and_device(dict):
+                Dictionary of user_id to device_id to message.
+            remote_messages_by_destination(dict):
+                Dictionary of destination server_name to the EDU JSON to send.
+        Returns:
+            A deferred stream_id that resolves when the messages have been
+            inserted.
+        """
+
+        def add_messages_txn(txn, now_ms, stream_id):
+            # Add the local messages directly to the local inbox.
+            self._add_messages_to_local_device_inbox_txn(
+                txn, stream_id, local_messages_by_user_then_device
+            )
+
+            # Add the remote messages to the federation outbox.
+            # We'll send them to a remote server when we next send a
+            # federation transaction to that destination.
+            sql = (
+                "INSERT INTO device_federation_outbox"
+                " (destination, stream_id, queued_ts, messages_json)"
+                " VALUES (?,?,?,?)"
+            )
+            rows = []
+            for destination, edu in remote_messages_by_destination.items():
+                edu_json = json.dumps(edu)
+                rows.append((destination, stream_id, now_ms, edu_json))
+            txn.executemany(sql, rows)
+
+        with self._device_inbox_id_gen.get_next() as stream_id:
+            now_ms = self.clock.time_msec()
+            yield self.db_pool.runInteraction(
+                "add_messages_to_device_inbox", add_messages_txn, now_ms, stream_id
+            )
+            for user_id in local_messages_by_user_then_device.keys():
+                self._device_inbox_stream_cache.entity_has_changed(user_id, stream_id)
+            for destination in remote_messages_by_destination.keys():
+                self._device_federation_outbox_stream_cache.entity_has_changed(
+                    destination, stream_id
+                )
+
+        return self._device_inbox_id_gen.get_current_token()
+
+    @defer.inlineCallbacks
+    def add_messages_from_remote_to_device_inbox(
+        self, origin, message_id, local_messages_by_user_then_device
+    ):
+        def add_messages_txn(txn, now_ms, stream_id):
+            # Check if we've already inserted a matching message_id for that
+            # origin. This can happen if the origin doesn't receive our
+            # acknowledgement from the first time we received the message.
+            already_inserted = self.db_pool.simple_select_one_txn(
+                txn,
+                table="device_federation_inbox",
+                keyvalues={"origin": origin, "message_id": message_id},
+                retcols=("message_id",),
+                allow_none=True,
+            )
+            if already_inserted is not None:
+                return
+
+            # Add an entry for this message_id so that we know we've processed
+            # it.
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="device_federation_inbox",
+                values={
+                    "origin": origin,
+                    "message_id": message_id,
+                    "received_ts": now_ms,
+                },
+            )
+
+            # Add the messages to the approriate local device inboxes so that
+            # they'll be sent to the devices when they next sync.
+            self._add_messages_to_local_device_inbox_txn(
+                txn, stream_id, local_messages_by_user_then_device
+            )
+
+        with self._device_inbox_id_gen.get_next() as stream_id:
+            now_ms = self.clock.time_msec()
+            yield self.db_pool.runInteraction(
+                "add_messages_from_remote_to_device_inbox",
+                add_messages_txn,
+                now_ms,
+                stream_id,
+            )
+            for user_id in local_messages_by_user_then_device.keys():
+                self._device_inbox_stream_cache.entity_has_changed(user_id, stream_id)
+
+        return stream_id
+
+    def _add_messages_to_local_device_inbox_txn(
+        self, txn, stream_id, messages_by_user_then_device
+    ):
+        local_by_user_then_device = {}
+        for user_id, messages_by_device in messages_by_user_then_device.items():
+            messages_json_for_user = {}
+            devices = list(messages_by_device.keys())
+            if len(devices) == 1 and devices[0] == "*":
+                # Handle wildcard device_ids.
+                sql = "SELECT device_id FROM devices WHERE user_id = ?"
+                txn.execute(sql, (user_id,))
+                message_json = json.dumps(messages_by_device["*"])
+                for row in txn:
+                    # Add the message for all devices for this user on this
+                    # server.
+                    device = row[0]
+                    messages_json_for_user[device] = message_json
+            else:
+                if not devices:
+                    continue
+
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "device_id", devices
+                )
+                sql = "SELECT device_id FROM devices WHERE user_id = ? AND " + clause
+
+                # TODO: Maybe this needs to be done in batches if there are
+                # too many local devices for a given user.
+                txn.execute(sql, [user_id] + list(args))
+                for row in txn:
+                    # Only insert into the local inbox if the device exists on
+                    # this server
+                    device = row[0]
+                    message_json = json.dumps(messages_by_device[device])
+                    messages_json_for_user[device] = message_json
+
+            if messages_json_for_user:
+                local_by_user_then_device[user_id] = messages_json_for_user
+
+        if not local_by_user_then_device:
+            return
+
+        sql = (
+            "INSERT INTO device_inbox"
+            " (user_id, device_id, stream_id, message_json)"
+            " VALUES (?,?,?,?)"
+        )
+        rows = []
+        for user_id, messages_by_device in local_by_user_then_device.items():
+            for device_id, message_json in messages_by_device.items():
+                rows.append((user_id, device_id, stream_id, message_json))
+
+        txn.executemany(sql, rows)
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
--- a/synapse/storage/databases/main/directory.py
+++ b/synapse/storage/databases/main/directory.py
@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+from typing import Optional
+
+from twisted.internet import defer
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
+from synapse.util.caches.descriptors import cached
+
+RoomAliasMapping = namedtuple("RoomAliasMapping", ("room_id", "room_alias", "servers"))
+
+
+class DirectoryWorkerStore(SQLBaseStore):
+    @defer.inlineCallbacks
+    def get_association_from_room_alias(self, room_alias):
+        """ Get's the room_id and server list for a given room_alias
+
+        Args:
+            room_alias (RoomAlias)
+
+        Returns:
+            Deferred: results in namedtuple with keys "room_id" and
+            "servers" or None if no association can be found
+        """
+        room_id = yield self.db_pool.simple_select_one_onecol(
+            "room_aliases",
+            {"room_alias": room_alias.to_string()},
+            "room_id",
+            allow_none=True,
+            desc="get_association_from_room_alias",
+        )
+
+        if not room_id:
+            return None
+
+        servers = yield self.db_pool.simple_select_onecol(
+            "room_alias_servers",
+            {"room_alias": room_alias.to_string()},
+            "server",
+            desc="get_association_from_room_alias",
+        )
+
+        if not servers:
+            return None
+
+        return RoomAliasMapping(room_id, room_alias.to_string(), servers)
+
+    def get_room_alias_creator(self, room_alias):
+        return self.db_pool.simple_select_one_onecol(
+            table="room_aliases",
+            keyvalues={"room_alias": room_alias},
+            retcol="creator",
+            desc="get_room_alias_creator",
+        )
+
+    @cached(max_entries=5000)
+    def get_aliases_for_room(self, room_id):
+        return self.db_pool.simple_select_onecol(
+            "room_aliases",
+            {"room_id": room_id},
+            "room_alias",
+            desc="get_aliases_for_room",
+        )
+
+
+class DirectoryStore(DirectoryWorkerStore):
+    @defer.inlineCallbacks
+    def create_room_alias_association(self, room_alias, room_id, servers, creator=None):
+        """ Creates an association between a room alias and room_id/servers
+
+        Args:
+            room_alias (RoomAlias)
+            room_id (str)
+            servers (list)
+            creator (str): Optional user_id of creator.
+
+        Returns:
+            Deferred
+        """
+
+        def alias_txn(txn):
+            self.db_pool.simple_insert_txn(
+                txn,
+                "room_aliases",
+                {
+                    "room_alias": room_alias.to_string(),
+                    "room_id": room_id,
+                    "creator": creator,
+                },
+            )
+
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="room_alias_servers",
+                values=[
+                    {"room_alias": room_alias.to_string(), "server": server}
+                    for server in servers
+                ],
+            )
+
+            self._invalidate_cache_and_stream(
+                txn, self.get_aliases_for_room, (room_id,)
+            )
+
+        try:
+            ret = yield self.db_pool.runInteraction(
+                "create_room_alias_association", alias_txn
+            )
+        except self.database_engine.module.IntegrityError:
+            raise SynapseError(
+                409, "Room alias %s already exists" % room_alias.to_string()
+            )
+        return ret
+
+    @defer.inlineCallbacks
+    def delete_room_alias(self, room_alias):
+        room_id = yield self.db_pool.runInteraction(
+            "delete_room_alias", self._delete_room_alias_txn, room_alias
+        )
+
+        return room_id
+
+    def _delete_room_alias_txn(self, txn, room_alias):
+        txn.execute(
+            "SELECT room_id FROM room_aliases WHERE room_alias = ?",
+            (room_alias.to_string(),),
+        )
+
+        res = txn.fetchone()
+        if res:
+            room_id = res[0]
+        else:
+            return None
+
+        txn.execute(
+            "DELETE FROM room_aliases WHERE room_alias = ?", (room_alias.to_string(),)
+        )
+
+        txn.execute(
+            "DELETE FROM room_alias_servers WHERE room_alias = ?",
+            (room_alias.to_string(),),
+        )
+
+        self._invalidate_cache_and_stream(txn, self.get_aliases_for_room, (room_id,))
+
+        return room_id
+
+    def update_aliases_for_room(
+        self, old_room_id: str, new_room_id: str, creator: Optional[str] = None,
+    ):
+        """Repoint all of the aliases for a given room, to a different room.
+
+        Args:
+            old_room_id:
+            new_room_id:
+            creator: The user to record as the creator of the new mapping.
+                If None, the creator will be left unchanged.
+        """
+
+        def _update_aliases_for_room_txn(txn):
+            update_creator_sql = ""
+            sql_params = (new_room_id, old_room_id)
+            if creator:
+                update_creator_sql = ", creator = ?"
+                sql_params = (new_room_id, creator, old_room_id)
+
+            sql = "UPDATE room_aliases SET room_id = ? %s WHERE room_id = ?" % (
+                update_creator_sql,
+            )
+            txn.execute(sql, sql_params)
+            self._invalidate_cache_and_stream(
+                txn, self.get_aliases_for_room, (old_room_id,)
+            )
+            self._invalidate_cache_and_stream(
+                txn, self.get_aliases_for_room, (new_room_id,)
+            )
+
+        return self.db_pool.runInteraction(
+            "_update_aliases_for_room_txn", _update_aliases_for_room_txn
+        )
--- a/synapse/storage/databases/main/e2e_room_keys.py
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@ -0,0 +1,439 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 New Vector Ltd
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.api.errors import StoreError
+from synapse.logging.opentracing import log_kv, trace
+from synapse.storage._base import SQLBaseStore, db_to_json
+
+
+class EndToEndRoomKeyStore(SQLBaseStore):
+    @defer.inlineCallbacks
+    def update_e2e_room_key(self, user_id, version, room_id, session_id, room_key):
+        """Replaces the encrypted E2E room key for a given session in a given backup
+
+        Args:
+            user_id(str): the user whose backup we're setting
+            version(str): the version ID of the backup we're updating
+            room_id(str): the ID of the room whose keys we're setting
+            session_id(str): the session whose room_key we're setting
+            room_key(dict): the room_key being set
+        Raises:
+            StoreError
+        """
+
+        yield self.db_pool.simple_update_one(
+            table="e2e_room_keys",
+            keyvalues={
+                "user_id": user_id,
+                "version": version,
+                "room_id": room_id,
+                "session_id": session_id,
+            },
+            updatevalues={
+                "first_message_index": room_key["first_message_index"],
+                "forwarded_count": room_key["forwarded_count"],
+                "is_verified": room_key["is_verified"],
+                "session_data": json.dumps(room_key["session_data"]),
+            },
+            desc="update_e2e_room_key",
+        )
+
+    @defer.inlineCallbacks
+    def add_e2e_room_keys(self, user_id, version, room_keys):
+        """Bulk add room keys to a given backup.
+
+        Args:
+            user_id (str): the user whose backup we're adding to
+            version (str): the version ID of the backup for the set of keys we're adding to
+            room_keys (iterable[(str, str, dict)]): the keys to add, in the form
+                (roomID, sessionID, keyData)
+        """
+
+        values = []
+        for (room_id, session_id, room_key) in room_keys:
+            values.append(
+                {
+                    "user_id": user_id,
+                    "version": version,
+                    "room_id": room_id,
+                    "session_id": session_id,
+                    "first_message_index": room_key["first_message_index"],
+                    "forwarded_count": room_key["forwarded_count"],
+                    "is_verified": room_key["is_verified"],
+                    "session_data": json.dumps(room_key["session_data"]),
+                }
+            )
+            log_kv(
+                {
+                    "message": "Set room key",
+                    "room_id": room_id,
+                    "session_id": session_id,
+                    "room_key": room_key,
+                }
+            )
+
+        yield self.db_pool.simple_insert_many(
+            table="e2e_room_keys", values=values, desc="add_e2e_room_keys"
+        )
+
+    @trace
+    @defer.inlineCallbacks
+    def get_e2e_room_keys(self, user_id, version, room_id=None, session_id=None):
+        """Bulk get the E2E room keys for a given backup, optionally filtered to a given
+        room, or a given session.
+
+        Args:
+            user_id (str): the user whose backup we're querying
+            version (str): the version ID of the backup for the set of keys we're querying
+            room_id (str): Optional. the ID of the room whose keys we're querying, if any.
+                If not specified, we return the keys for all the rooms in the backup.
+            session_id (str): Optional. the session whose room_key we're querying, if any.
+                If specified, we also require the room_id to be specified.
+                If not specified, we return all the keys in this version of
+                the backup (or for the specified room)
+
+        Returns:
+            A deferred list of dicts giving the session_data and message metadata for
+            these room keys.
+        """
+
+        try:
+            version = int(version)
+        except ValueError:
+            return {"rooms": {}}
+
+        keyvalues = {"user_id": user_id, "version": version}
+        if room_id:
+            keyvalues["room_id"] = room_id
+            if session_id:
+                keyvalues["session_id"] = session_id
+
+        rows = yield self.db_pool.simple_select_list(
+            table="e2e_room_keys",
+            keyvalues=keyvalues,
+            retcols=(
+                "user_id",
+                "room_id",
+                "session_id",
+                "first_message_index",
+                "forwarded_count",
+                "is_verified",
+                "session_data",
+            ),
+            desc="get_e2e_room_keys",
+        )
+
+        sessions = {"rooms": {}}
+        for row in rows:
+            room_entry = sessions["rooms"].setdefault(row["room_id"], {"sessions": {}})
+            room_entry["sessions"][row["session_id"]] = {
+                "first_message_index": row["first_message_index"],
+                "forwarded_count": row["forwarded_count"],
+                # is_verified must be returned to the client as a boolean
+                "is_verified": bool(row["is_verified"]),
+                "session_data": db_to_json(row["session_data"]),
+            }
+
+        return sessions
+
+    def get_e2e_room_keys_multi(self, user_id, version, room_keys):
+        """Get multiple room keys at a time.  The difference between this function and
+        get_e2e_room_keys is that this function can be used to retrieve
+        multiple specific keys at a time, whereas get_e2e_room_keys is used for
+        getting all the keys in a backup version, all the keys for a room, or a
+        specific key.
+
+        Args:
+            user_id (str): the user whose backup we're querying
+            version (str): the version ID of the backup we're querying about
+            room_keys (dict[str, dict[str, iterable[str]]]): a map from
+                room ID -> {"session": [session ids]} indicating the session IDs
+                that we want to query
+
+        Returns:
+           Deferred[dict[str, dict[str, dict]]]: a map of room IDs to session IDs to room key
+        """
+
+        return self.db_pool.runInteraction(
+            "get_e2e_room_keys_multi",
+            self._get_e2e_room_keys_multi_txn,
+            user_id,
+            version,
+            room_keys,
+        )
+
+    @staticmethod
+    def _get_e2e_room_keys_multi_txn(txn, user_id, version, room_keys):
+        if not room_keys:
+            return {}
+
+        where_clauses = []
+        params = [user_id, version]
+        for room_id, room in room_keys.items():
+            sessions = list(room["sessions"])
+            if not sessions:
+                continue
+            params.append(room_id)
+            params.extend(sessions)
+            where_clauses.append(
+                "(room_id = ? AND session_id IN (%s))"
+                % (",".join(["?" for _ in sessions]),)
+            )
+
+        # check if we're actually querying something
+        if not where_clauses:
+            return {}
+
+        sql = """
+        SELECT room_id, session_id, first_message_index, forwarded_count,
+               is_verified, session_data
+        FROM e2e_room_keys
+        WHERE user_id = ? AND version = ? AND (%s)
+        """ % (
+            " OR ".join(where_clauses)
+        )
+
+        txn.execute(sql, params)
+
+        ret = {}
+
+        for row in txn:
+            room_id = row[0]
+            session_id = row[1]
+            ret.setdefault(room_id, {})
+            ret[room_id][session_id] = {
+                "first_message_index": row[2],
+                "forwarded_count": row[3],
+                "is_verified": row[4],
+                "session_data": db_to_json(row[5]),
+            }
+
+        return ret
+
+    def count_e2e_room_keys(self, user_id, version):
+        """Get the number of keys in a backup version.
+
+        Args:
+            user_id (str): the user whose backup we're querying
+            version (str): the version ID of the backup we're querying about
+        """
+
+        return self.db_pool.simple_select_one_onecol(
+            table="e2e_room_keys",
+            keyvalues={"user_id": user_id, "version": version},
+            retcol="COUNT(*)",
+            desc="count_e2e_room_keys",
+        )
+
+    @trace
+    @defer.inlineCallbacks
+    def delete_e2e_room_keys(self, user_id, version, room_id=None, session_id=None):
+        """Bulk delete the E2E room keys for a given backup, optionally filtered to a given
+        room or a given session.
+
+        Args:
+            user_id(str): the user whose backup we're deleting from
+            version(str): the version ID of the backup for the set of keys we're deleting
+            room_id(str): Optional. the ID of the room whose keys we're deleting, if any.
+                If not specified, we delete the keys for all the rooms in the backup.
+            session_id(str): Optional. the session whose room_key we're querying, if any.
+                If specified, we also require the room_id to be specified.
+                If not specified, we delete all the keys in this version of
+                the backup (or for the specified room)
+
+        Returns:
+            A deferred of the deletion transaction
+        """
+
+        keyvalues = {"user_id": user_id, "version": int(version)}
+        if room_id:
+            keyvalues["room_id"] = room_id
+            if session_id:
+                keyvalues["session_id"] = session_id
+
+        yield self.db_pool.simple_delete(
+            table="e2e_room_keys", keyvalues=keyvalues, desc="delete_e2e_room_keys"
+        )
+
+    @staticmethod
+    def _get_current_version(txn, user_id):
+        txn.execute(
+            "SELECT MAX(version) FROM e2e_room_keys_versions "
+            "WHERE user_id=? AND deleted=0",
+            (user_id,),
+        )
+        row = txn.fetchone()
+        if not row:
+            raise StoreError(404, "No current backup version")
+        return row[0]
+
+    def get_e2e_room_keys_version_info(self, user_id, version=None):
+        """Get info metadata about a version of our room_keys backup.
+
+        Args:
+            user_id(str): the user whose backup we're querying
+            version(str): Optional. the version ID of the backup we're querying about
+                If missing, we return the information about the current version.
+        Raises:
+            StoreError: with code 404 if there are no e2e_room_keys_versions present
+        Returns:
+            A deferred dict giving the info metadata for this backup version, with
+            fields including:
+                version(str)
+                algorithm(str)
+                auth_data(object): opaque dict supplied by the client
+                etag(int): tag of the keys in the backup
+        """
+
+        def _get_e2e_room_keys_version_info_txn(txn):
+            if version is None:
+                this_version = self._get_current_version(txn, user_id)
+            else:
+                try:
+                    this_version = int(version)
+                except ValueError:
+                    # Our versions are all ints so if we can't convert it to an integer,
+                    # it isn't there.
+                    raise StoreError(404, "No row found")
+
+            result = self.db_pool.simple_select_one_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                keyvalues={"user_id": user_id, "version": this_version, "deleted": 0},
+                retcols=("version", "algorithm", "auth_data", "etag"),
+            )
+            result["auth_data"] = db_to_json(result["auth_data"])
+            result["version"] = str(result["version"])
+            if result["etag"] is None:
+                result["etag"] = 0
+            return result
+
+        return self.db_pool.runInteraction(
+            "get_e2e_room_keys_version_info", _get_e2e_room_keys_version_info_txn
+        )
+
+    @trace
+    def create_e2e_room_keys_version(self, user_id, info):
+        """Atomically creates a new version of this user's e2e_room_keys store
+        with the given version info.
+
+        Args:
+            user_id(str): the user whose backup we're creating a version
+            info(dict): the info about the backup version to be created
+
+        Returns:
+            A deferred string for the newly created version ID
+        """
+
+        def _create_e2e_room_keys_version_txn(txn):
+            txn.execute(
+                "SELECT MAX(version) FROM e2e_room_keys_versions WHERE user_id=?",
+                (user_id,),
+            )
+            current_version = txn.fetchone()[0]
+            if current_version is None:
+                current_version = "0"
+
+            new_version = str(int(current_version) + 1)
+
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                values={
+                    "user_id": user_id,
+                    "version": new_version,
+                    "algorithm": info["algorithm"],
+                    "auth_data": json.dumps(info["auth_data"]),
+                },
+            )
+
+            return new_version
+
+        return self.db_pool.runInteraction(
+            "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn
+        )
+
+    @trace
+    def update_e2e_room_keys_version(
+        self, user_id, version, info=None, version_etag=None
+    ):
+        """Update a given backup version
+
+        Args:
+            user_id(str): the user whose backup version we're updating
+            version(str): the version ID of the backup version we're updating
+            info (dict): the new backup version info to store.  If None, then
+                the backup version info is not updated
+            version_etag (Optional[int]): etag of the keys in the backup.  If
+                None, then the etag is not updated
+        """
+        updatevalues = {}
+
+        if info is not None and "auth_data" in info:
+            updatevalues["auth_data"] = json.dumps(info["auth_data"])
+        if version_etag is not None:
+            updatevalues["etag"] = version_etag
+
+        if updatevalues:
+            return self.db_pool.simple_update(
+                table="e2e_room_keys_versions",
+                keyvalues={"user_id": user_id, "version": version},
+                updatevalues=updatevalues,
+                desc="update_e2e_room_keys_version",
+            )
+
+    @trace
+    def delete_e2e_room_keys_version(self, user_id, version=None):
+        """Delete a given backup version of the user's room keys.
+        Doesn't delete their actual key data.
+
+        Args:
+            user_id(str): the user whose backup version we're deleting
+            version(str): Optional. the version ID of the backup version we're deleting
+                If missing, we delete the current backup version info.
+        Raises:
+            StoreError: with code 404 if there are no e2e_room_keys_versions present,
+                or if the version requested doesn't exist.
+        """
+
+        def _delete_e2e_room_keys_version_txn(txn):
+            if version is None:
+                this_version = self._get_current_version(txn, user_id)
+                if this_version is None:
+                    raise StoreError(404, "No current backup version")
+            else:
+                this_version = version
+
+            self.db_pool.simple_delete_txn(
+                txn,
+                table="e2e_room_keys",
+                keyvalues={"user_id": user_id, "version": this_version},
+            )
+
+            return self.db_pool.simple_update_one_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                keyvalues={"user_id": user_id, "version": this_version},
+                updatevalues={"deleted": 1},
+            )
+
+        return self.db_pool.runInteraction(
+            "delete_e2e_room_keys_version", _delete_e2e_room_keys_version_txn
+        )
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@ -0,0 +1,748 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, Tuple
+
+from canonicaljson import encode_canonical_json, json
+
+from twisted.enterprise.adbapi import Connection
+from twisted.internet import defer
+
+from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import make_in_list_sql_clause
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+
+class EndToEndKeyWorkerStore(SQLBaseStore):
+    @trace
+    @defer.inlineCallbacks
+    def get_e2e_device_keys(
+        self, query_list, include_all_devices=False, include_deleted_devices=False
+    ):
+        """Fetch a list of device keys.
+        Args:
+            query_list(list): List of pairs of user_ids and device_ids.
+            include_all_devices (bool): whether to include entries for devices
+                that don't have device keys
+            include_deleted_devices (bool): whether to include null entries for
+                devices which no longer exist (but were in the query_list).
+                This option only takes effect if include_all_devices is true.
+        Returns:
+            Dict mapping from user-id to dict mapping from device_id to
+            key data.  The key data will be a dict in the same format as the
+            DeviceKeys type returned by POST /_matrix/client/r0/keys/query.
+        """
+        set_tag("query_list", query_list)
+        if not query_list:
+            return {}
+
+        results = yield self.db_pool.runInteraction(
+            "get_e2e_device_keys",
+            self._get_e2e_device_keys_txn,
+            query_list,
+            include_all_devices,
+            include_deleted_devices,
+        )
+
+        # Build the result structure, un-jsonify the results, and add the
+        # "unsigned" section
+        rv = {}
+        for user_id, device_keys in results.items():
+            rv[user_id] = {}
+            for device_id, device_info in device_keys.items():
+                r = db_to_json(device_info.pop("key_json"))
+                r["unsigned"] = {}
+                display_name = device_info["device_display_name"]
+                if display_name is not None:
+                    r["unsigned"]["device_display_name"] = display_name
+                if "signatures" in device_info:
+                    for sig_user_id, sigs in device_info["signatures"].items():
+                        r.setdefault("signatures", {}).setdefault(
+                            sig_user_id, {}
+                        ).update(sigs)
+                rv[user_id][device_id] = r
+
+        return rv
+
+    @trace
+    def _get_e2e_device_keys_txn(
+        self, txn, query_list, include_all_devices=False, include_deleted_devices=False
+    ):
+        set_tag("include_all_devices", include_all_devices)
+        set_tag("include_deleted_devices", include_deleted_devices)
+
+        query_clauses = []
+        query_params = []
+        signature_query_clauses = []
+        signature_query_params = []
+
+        if include_all_devices is False:
+            include_deleted_devices = False
+
+        if include_deleted_devices:
+            deleted_devices = set(query_list)
+
+        for (user_id, device_id) in query_list:
+            query_clause = "user_id = ?"
+            query_params.append(user_id)
+            signature_query_clause = "target_user_id = ?"
+            signature_query_params.append(user_id)
+
+            if device_id is not None:
+                query_clause += " AND device_id = ?"
+                query_params.append(device_id)
+                signature_query_clause += " AND target_device_id = ?"
+                signature_query_params.append(device_id)
+
+            signature_query_clause += " AND user_id = ?"
+            signature_query_params.append(user_id)
+
+            query_clauses.append(query_clause)
+            signature_query_clauses.append(signature_query_clause)
+
+        sql = (
+            "SELECT user_id, device_id, "
+            "    d.display_name AS device_display_name, "
+            "    k.key_json"
+            " FROM devices d"
+            "    %s JOIN e2e_device_keys_json k USING (user_id, device_id)"
+            " WHERE %s AND NOT d.hidden"
+        ) % (
+            "LEFT" if include_all_devices else "INNER",
+            " OR ".join("(" + q + ")" for q in query_clauses),
+        )
+
+        txn.execute(sql, query_params)
+        rows = self.db_pool.cursor_to_dict(txn)
+
+        result = {}
+        for row in rows:
+            if include_deleted_devices:
+                deleted_devices.remove((row["user_id"], row["device_id"]))
+            result.setdefault(row["user_id"], {})[row["device_id"]] = row
+
+        if include_deleted_devices:
+            for user_id, device_id in deleted_devices:
+                result.setdefault(user_id, {})[device_id] = None
+
+        # get signatures on the device
+        signature_sql = ("SELECT *  FROM e2e_cross_signing_signatures WHERE %s") % (
+            " OR ".join("(" + q + ")" for q in signature_query_clauses)
+        )
+
+        txn.execute(signature_sql, signature_query_params)
+        rows = self.db_pool.cursor_to_dict(txn)
+
+        # add each cross-signing signature to the correct device in the result dict.
+        for row in rows:
+            signing_user_id = row["user_id"]
+            signing_key_id = row["key_id"]
+            target_user_id = row["target_user_id"]
+            target_device_id = row["target_device_id"]
+            signature = row["signature"]
+
+            target_user_result = result.get(target_user_id)
+            if not target_user_result:
+                continue
+
+            target_device_result = target_user_result.get(target_device_id)
+            if not target_device_result:
+                # note that target_device_result will be None for deleted devices.
+                continue
+
+            target_device_signatures = target_device_result.setdefault("signatures", {})
+            signing_user_signatures = target_device_signatures.setdefault(
+                signing_user_id, {}
+            )
+            signing_user_signatures[signing_key_id] = signature
+
+        log_kv(result)
+        return result
+
+    @defer.inlineCallbacks
+    def get_e2e_one_time_keys(self, user_id, device_id, key_ids):
+        """Retrieve a number of one-time keys for a user
+
+        Args:
+            user_id(str): id of user to get keys for
+            device_id(str): id of device to get keys for
+            key_ids(list[str]): list of key ids (excluding algorithm) to
+                retrieve
+
+        Returns:
+            deferred resolving to Dict[(str, str), str]: map from (algorithm,
+            key_id) to json string for key
+        """
+
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="e2e_one_time_keys_json",
+            column="key_id",
+            iterable=key_ids,
+            retcols=("algorithm", "key_id", "key_json"),
+            keyvalues={"user_id": user_id, "device_id": device_id},
+            desc="add_e2e_one_time_keys_check",
+        )
+        result = {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows}
+        log_kv({"message": "Fetched one time keys for user", "one_time_keys": result})
+        return result
+
+    @defer.inlineCallbacks
+    def add_e2e_one_time_keys(self, user_id, device_id, time_now, new_keys):
+        """Insert some new one time keys for a device. Errors if any of the
+        keys already exist.
+
+        Args:
+            user_id(str): id of user to get keys for
+            device_id(str): id of device to get keys for
+            time_now(long): insertion time to record (ms since epoch)
+            new_keys(iterable[(str, str, str)]: keys to add - each a tuple of
+                (algorithm, key_id, key json)
+        """
+
+        def _add_e2e_one_time_keys(txn):
+            set_tag("user_id", user_id)
+            set_tag("device_id", device_id)
+            set_tag("new_keys", new_keys)
+            # We are protected from race between lookup and insertion due to
+            # a unique constraint. If there is a race of two calls to
+            # `add_e2e_one_time_keys` then they'll conflict and we will only
+            # insert one set.
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="e2e_one_time_keys_json",
+                values=[
+                    {
+                        "user_id": user_id,
+                        "device_id": device_id,
+                        "algorithm": algorithm,
+                        "key_id": key_id,
+                        "ts_added_ms": time_now,
+                        "key_json": json_bytes,
+                    }
+                    for algorithm, key_id, json_bytes in new_keys
+                ],
+            )
+            self._invalidate_cache_and_stream(
+                txn, self.count_e2e_one_time_keys, (user_id, device_id)
+            )
+
+        yield self.db_pool.runInteraction(
+            "add_e2e_one_time_keys_insert", _add_e2e_one_time_keys
+        )
+
+    @cached(max_entries=10000)
+    def count_e2e_one_time_keys(self, user_id, device_id):
+        """ Count the number of one time keys the server has for a device
+        Returns:
+            Dict mapping from algorithm to number of keys for that algorithm.
+        """
+
+        def _count_e2e_one_time_keys(txn):
+            sql = (
+                "SELECT algorithm, COUNT(key_id) FROM e2e_one_time_keys_json"
+                " WHERE user_id = ? AND device_id = ?"
+                " GROUP BY algorithm"
+            )
+            txn.execute(sql, (user_id, device_id))
+            result = {}
+            for algorithm, key_count in txn:
+                result[algorithm] = key_count
+            return result
+
+        return self.db_pool.runInteraction(
+            "count_e2e_one_time_keys", _count_e2e_one_time_keys
+        )
+
+    @defer.inlineCallbacks
+    def get_e2e_cross_signing_key(self, user_id, key_type, from_user_id=None):
+        """Returns a user's cross-signing key.
+
+        Args:
+            user_id (str): the user whose key is being requested
+            key_type (str): the type of key that is being requested: either 'master'
+                for a master key, 'self_signing' for a self-signing key, or
+                'user_signing' for a user-signing key
+            from_user_id (str): if specified, signatures made by this user on
+                the self-signing key will be included in the result
+
+        Returns:
+            dict of the key data or None if not found
+        """
+        res = yield self.get_e2e_cross_signing_keys_bulk([user_id], from_user_id)
+        user_keys = res.get(user_id)
+        if not user_keys:
+            return None
+        return user_keys.get(key_type)
+
+    @cached(num_args=1)
+    def _get_bare_e2e_cross_signing_keys(self, user_id):
+        """Dummy function.  Only used to make a cache for
+        _get_bare_e2e_cross_signing_keys_bulk.
+        """
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="_get_bare_e2e_cross_signing_keys",
+        list_name="user_ids",
+        num_args=1,
+    )
+    def _get_bare_e2e_cross_signing_keys_bulk(
+        self, user_ids: List[str]
+    ) -> Dict[str, Dict[str, dict]]:
+        """Returns the cross-signing keys for a set of users.  The output of this
+        function should be passed to _get_e2e_cross_signing_signatures_txn if
+        the signatures for the calling user need to be fetched.
+
+        Args:
+            user_ids (list[str]): the users whose keys are being requested
+
+        Returns:
+            dict[str, dict[str, dict]]: mapping from user ID to key type to key
+                data.  If a user's cross-signing keys were not found, either
+                their user ID will not be in the dict, or their user ID will map
+                to None.
+
+        """
+        return self.db_pool.runInteraction(
+            "get_bare_e2e_cross_signing_keys_bulk",
+            self._get_bare_e2e_cross_signing_keys_bulk_txn,
+            user_ids,
+        )
+
+    def _get_bare_e2e_cross_signing_keys_bulk_txn(
+        self, txn: Connection, user_ids: List[str],
+    ) -> Dict[str, Dict[str, dict]]:
+        """Returns the cross-signing keys for a set of users.  The output of this
+        function should be passed to _get_e2e_cross_signing_signatures_txn if
+        the signatures for the calling user need to be fetched.
+
+        Args:
+            txn (twisted.enterprise.adbapi.Connection): db connection
+            user_ids (list[str]): the users whose keys are being requested
+
+        Returns:
+            dict[str, dict[str, dict]]: mapping from user ID to key type to key
+                data.  If a user's cross-signing keys were not found, their user
+                ID will not be in the dict.
+
+        """
+        result = {}
+
+        for user_chunk in batch_iter(user_ids, 100):
+            clause, params = make_in_list_sql_clause(
+                txn.database_engine, "k.user_id", user_chunk
+            )
+            sql = (
+                """
+                SELECT k.user_id, k.keytype, k.keydata, k.stream_id
+                  FROM e2e_cross_signing_keys k
+                  INNER JOIN (SELECT user_id, keytype, MAX(stream_id) AS stream_id
+                                FROM e2e_cross_signing_keys
+                               GROUP BY user_id, keytype) s
+                 USING (user_id, stream_id, keytype)
+                 WHERE
+            """
+                + clause
+            )
+
+            txn.execute(sql, params)
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            for row in rows:
+                user_id = row["user_id"]
+                key_type = row["keytype"]
+                key = db_to_json(row["keydata"])
+                user_info = result.setdefault(user_id, {})
+                user_info[key_type] = key
+
+        return result
+
+    def _get_e2e_cross_signing_signatures_txn(
+        self, txn: Connection, keys: Dict[str, Dict[str, dict]], from_user_id: str,
+    ) -> Dict[str, Dict[str, dict]]:
+        """Returns the cross-signing signatures made by a user on a set of keys.
+
+        Args:
+            txn (twisted.enterprise.adbapi.Connection): db connection
+            keys (dict[str, dict[str, dict]]): a map of user ID to key type to
+                key data.  This dict will be modified to add signatures.
+            from_user_id (str): fetch the signatures made by this user
+
+        Returns:
+            dict[str, dict[str, dict]]: mapping from user ID to key type to key
+                data.  The return value will be the same as the keys argument,
+                with the modifications included.
+        """
+
+        # find out what cross-signing keys (a.k.a. devices) we need to get
+        # signatures for.  This is a map of (user_id, device_id) to key type
+        # (device_id is the key's public part).
+        devices = {}
+
+        for user_id, user_info in keys.items():
+            if user_info is None:
+                continue
+            for key_type, key in user_info.items():
+                device_id = None
+                for k in key["keys"].values():
+                    device_id = k
+                devices[(user_id, device_id)] = key_type
+
+        for batch in batch_iter(devices.keys(), size=100):
+            sql = """
+                SELECT target_user_id, target_device_id, key_id, signature
+                  FROM e2e_cross_signing_signatures
+                 WHERE user_id = ?
+                   AND (%s)
+            """ % (
+                " OR ".join(
+                    "(target_user_id = ? AND target_device_id = ?)" for _ in batch
+                )
+            )
+            query_params = [from_user_id]
+            for item in batch:
+                # item is a (user_id, device_id) tuple
+                query_params.extend(item)
+
+            txn.execute(sql, query_params)
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            # and add the signatures to the appropriate keys
+            for row in rows:
+                key_id = row["key_id"]
+                target_user_id = row["target_user_id"]
+                target_device_id = row["target_device_id"]
+                key_type = devices[(target_user_id, target_device_id)]
+                # We need to copy everything, because the result may have come
+                # from the cache.  dict.copy only does a shallow copy, so we
+                # need to recursively copy the dicts that will be modified.
+                user_info = keys[target_user_id] = keys[target_user_id].copy()
+                target_user_key = user_info[key_type] = user_info[key_type].copy()
+                if "signatures" in target_user_key:
+                    signatures = target_user_key["signatures"] = target_user_key[
+                        "signatures"
+                    ].copy()
+                    if from_user_id in signatures:
+                        user_sigs = signatures[from_user_id] = signatures[from_user_id]
+                        user_sigs[key_id] = row["signature"]
+                    else:
+                        signatures[from_user_id] = {key_id: row["signature"]}
+                else:
+                    target_user_key["signatures"] = {
+                        from_user_id: {key_id: row["signature"]}
+                    }
+
+        return keys
+
+    @defer.inlineCallbacks
+    def get_e2e_cross_signing_keys_bulk(
+        self, user_ids: List[str], from_user_id: str = None
+    ) -> defer.Deferred:
+        """Returns the cross-signing keys for a set of users.
+
+        Args:
+            user_ids (list[str]): the users whose keys are being requested
+            from_user_id (str): if specified, signatures made by this user on
+                the self-signing keys will be included in the result
+
+        Returns:
+            Deferred[dict[str, dict[str, dict]]]: map of user ID to key type to
+                key data.  If a user's cross-signing keys were not found, either
+                their user ID will not be in the dict, or their user ID will map
+                to None.
+        """
+
+        result = yield self._get_bare_e2e_cross_signing_keys_bulk(user_ids)
+
+        if from_user_id:
+            result = yield self.db_pool.runInteraction(
+                "get_e2e_cross_signing_signatures",
+                self._get_e2e_cross_signing_signatures_txn,
+                result,
+                from_user_id,
+            )
+
+        return result
+
+    async def get_all_user_signature_changes_for_remotes(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        """Get updates for groups replication stream.
+
+        Note that the user signature stream represents when a user signs their
+        device with their user-signing key, which is not published to other
+        users or servers, so no `destination` is needed in the returned
+        list. However, this is needed to poke workers.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def _get_all_user_signature_changes_for_remotes_txn(txn):
+            sql = """
+                SELECT stream_id, from_user_id AS user_id
+                FROM user_signature_stream
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+
+            updates = [(row[0], (row[1:])) for row in txn]
+
+            limited = False
+            upto_token = current_id
+            if len(updates) >= limit:
+                upto_token = updates[-1][0]
+                limited = True
+
+            return updates, upto_token, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_user_signature_changes_for_remotes",
+            _get_all_user_signature_changes_for_remotes_txn,
+        )
+
+
+class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
+    def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys):
+        """Stores device keys for a device. Returns whether there was a change
+        or the keys were already in the database.
+        """
+
+        def _set_e2e_device_keys_txn(txn):
+            set_tag("user_id", user_id)
+            set_tag("device_id", device_id)
+            set_tag("time_now", time_now)
+            set_tag("device_keys", device_keys)
+
+            old_key_json = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="e2e_device_keys_json",
+                keyvalues={"user_id": user_id, "device_id": device_id},
+                retcol="key_json",
+                allow_none=True,
+            )
+
+            # In py3 we need old_key_json to match new_key_json type. The DB
+            # returns unicode while encode_canonical_json returns bytes.
+            new_key_json = encode_canonical_json(device_keys).decode("utf-8")
+
+            if old_key_json == new_key_json:
+                log_kv({"Message": "Device key already stored."})
+                return False
+
+            self.db_pool.simple_upsert_txn(
+                txn,
+                table="e2e_device_keys_json",
+                keyvalues={"user_id": user_id, "device_id": device_id},
+                values={"ts_added_ms": time_now, "key_json": new_key_json},
+            )
+            log_kv({"message": "Device keys stored."})
+            return True
+
+        return self.db_pool.runInteraction(
+            "set_e2e_device_keys", _set_e2e_device_keys_txn
+        )
+
+    def claim_e2e_one_time_keys(self, query_list):
+        """Take a list of one time keys out of the database"""
+
+        @trace
+        def _claim_e2e_one_time_keys(txn):
+            sql = (
+                "SELECT key_id, key_json FROM e2e_one_time_keys_json"
+                " WHERE user_id = ? AND device_id = ? AND algorithm = ?"
+                " LIMIT 1"
+            )
+            result = {}
+            delete = []
+            for user_id, device_id, algorithm in query_list:
+                user_result = result.setdefault(user_id, {})
+                device_result = user_result.setdefault(device_id, {})
+                txn.execute(sql, (user_id, device_id, algorithm))
+                for key_id, key_json in txn:
+                    device_result[algorithm + ":" + key_id] = key_json
+                    delete.append((user_id, device_id, algorithm, key_id))
+            sql = (
+                "DELETE FROM e2e_one_time_keys_json"
+                " WHERE user_id = ? AND device_id = ? AND algorithm = ?"
+                " AND key_id = ?"
+            )
+            for user_id, device_id, algorithm, key_id in delete:
+                log_kv(
+                    {
+                        "message": "Executing claim e2e_one_time_keys transaction on database."
+                    }
+                )
+                txn.execute(sql, (user_id, device_id, algorithm, key_id))
+                log_kv({"message": "finished executing and invalidating cache"})
+                self._invalidate_cache_and_stream(
+                    txn, self.count_e2e_one_time_keys, (user_id, device_id)
+                )
+            return result
+
+        return self.db_pool.runInteraction(
+            "claim_e2e_one_time_keys", _claim_e2e_one_time_keys
+        )
+
+    def delete_e2e_keys_by_device(self, user_id, device_id):
+        def delete_e2e_keys_by_device_txn(txn):
+            log_kv(
+                {
+                    "message": "Deleting keys for device",
+                    "device_id": device_id,
+                    "user_id": user_id,
+                }
+            )
+            self.db_pool.simple_delete_txn(
+                txn,
+                table="e2e_device_keys_json",
+                keyvalues={"user_id": user_id, "device_id": device_id},
+            )
+            self.db_pool.simple_delete_txn(
+                txn,
+                table="e2e_one_time_keys_json",
+                keyvalues={"user_id": user_id, "device_id": device_id},
+            )
+            self._invalidate_cache_and_stream(
+                txn, self.count_e2e_one_time_keys, (user_id, device_id)
+            )
+
+        return self.db_pool.runInteraction(
+            "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn
+        )
+
+    def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key):
+        """Set a user's cross-signing key.
+
+        Args:
+            txn (twisted.enterprise.adbapi.Connection): db connection
+            user_id (str): the user to set the signing key for
+            key_type (str): the type of key that is being set: either 'master'
+                for a master key, 'self_signing' for a self-signing key, or
+                'user_signing' for a user-signing key
+            key (dict): the key data
+        """
+        # the 'key' dict will look something like:
+        # {
+        #   "user_id": "@alice:example.com",
+        #   "usage": ["self_signing"],
+        #   "keys": {
+        #     "ed25519:base64+self+signing+public+key": "base64+self+signing+public+key",
+        #   },
+        #   "signatures": {
+        #     "@alice:example.com": {
+        #       "ed25519:base64+master+public+key": "base64+signature"
+        #     }
+        #   }
+        # }
+        # The "keys" property must only have one entry, which will be the public
+        # key, so we just grab the first value in there
+        pubkey = next(iter(key["keys"].values()))
+
+        # The cross-signing keys need to occupy the same namespace as devices,
+        # since signatures are identified by device ID.  So add an entry to the
+        # device table to make sure that we don't have a collision with device
+        # IDs.
+        # We only need to do this for local users, since remote servers should be
+        # responsible for checking this for their own users.
+        if self.hs.is_mine_id(user_id):
+            self.db_pool.simple_insert_txn(
+                txn,
+                "devices",
+                values={
+                    "user_id": user_id,
+                    "device_id": pubkey,
+                    "display_name": key_type + " signing key",
+                    "hidden": True,
+                },
+            )
+
+        # and finally, store the key itself
+        with self._cross_signing_id_gen.get_next() as stream_id:
+            self.db_pool.simple_insert_txn(
+                txn,
+                "e2e_cross_signing_keys",
+                values={
+                    "user_id": user_id,
+                    "keytype": key_type,
+                    "keydata": json.dumps(key),
+                    "stream_id": stream_id,
+                },
+            )
+
+        self._invalidate_cache_and_stream(
+            txn, self._get_bare_e2e_cross_signing_keys, (user_id,)
+        )
+
+    def set_e2e_cross_signing_key(self, user_id, key_type, key):
+        """Set a user's cross-signing key.
+
+        Args:
+            user_id (str): the user to set the user-signing key for
+            key_type (str): the type of cross-signing key to set
+            key (dict): the key data
+        """
+        return self.db_pool.runInteraction(
+            "add_e2e_cross_signing_key",
+            self._set_e2e_cross_signing_key_txn,
+            user_id,
+            key_type,
+            key,
+        )
+
+    def store_e2e_cross_signing_signatures(self, user_id, signatures):
+        """Stores cross-signing signatures.
+
+        Args:
+            user_id (str): the user who made the signatures
+            signatures (iterable[SignatureListItem]): signatures to add
+        """
+        return self.db_pool.simple_insert_many(
+            "e2e_cross_signing_signatures",
+            [
+                {
+                    "user_id": user_id,
+                    "key_id": item.signing_key_id,
+                    "target_user_id": item.target_user_id,
+                    "target_device_id": item.target_device_id,
+                    "signature": item.signature,
+                }
+                for item in signatures
+            ],
+            "add_e2e_signing_key",
+        )
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@ -0,0 +1,726 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+import logging
+from queue import Empty, PriorityQueue
+from typing import Dict, List, Optional, Set, Tuple
+
+from twisted.internet import defer
+
+from synapse.api.errors import StoreError
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.signatures import SignatureWorkerStore
+from synapse.util.caches.descriptors import cached
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBaseStore):
+    def get_auth_chain(self, event_ids, include_given=False):
+        """Get auth events for given event_ids. The events *must* be state events.
+
+        Args:
+            event_ids (list): state events
+            include_given (bool): include the given events in result
+
+        Returns:
+            list of events
+        """
+        return self.get_auth_chain_ids(
+            event_ids, include_given=include_given
+        ).addCallback(self.get_events_as_list)
+
+    def get_auth_chain_ids(
+        self,
+        event_ids: List[str],
+        include_given: bool = False,
+        ignore_events: Optional[Set[str]] = None,
+    ):
+        """Get auth events for given event_ids. The events *must* be state events.
+
+        Args:
+            event_ids: state events
+            include_given: include the given events in result
+            ignore_events: Set of events to exclude from the returned auth
+                chain. This is useful if the caller will just discard the
+                given events anyway, and saves us from figuring out their auth
+                chains if not required.
+
+        Returns:
+            list of event_ids
+        """
+        return self.db_pool.runInteraction(
+            "get_auth_chain_ids",
+            self._get_auth_chain_ids_txn,
+            event_ids,
+            include_given,
+            ignore_events,
+        )
+
+    def _get_auth_chain_ids_txn(self, txn, event_ids, include_given, ignore_events):
+        if ignore_events is None:
+            ignore_events = set()
+
+        if include_given:
+            results = set(event_ids)
+        else:
+            results = set()
+
+        base_sql = "SELECT auth_id FROM event_auth WHERE "
+
+        front = set(event_ids)
+        while front:
+            new_front = set()
+            for chunk in batch_iter(front, 100):
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "event_id", chunk
+                )
+                txn.execute(base_sql + clause, args)
+                new_front.update(r[0] for r in txn)
+
+            new_front -= ignore_events
+            new_front -= results
+
+            front = new_front
+            results.update(front)
+
+        return list(results)
+
+    def get_auth_chain_difference(self, state_sets: List[Set[str]]):
+        """Given sets of state events figure out the auth chain difference (as
+        per state res v2 algorithm).
+
+        This equivalent to fetching the full auth chain for each set of state
+        and returning the events that don't appear in each and every auth
+        chain.
+
+        Returns:
+            Deferred[Set[str]]
+        """
+
+        return self.db_pool.runInteraction(
+            "get_auth_chain_difference",
+            self._get_auth_chain_difference_txn,
+            state_sets,
+        )
+
+    def _get_auth_chain_difference_txn(
+        self, txn, state_sets: List[Set[str]]
+    ) -> Set[str]:
+
+        # Algorithm Description
+        # ~~~~~~~~~~~~~~~~~~~~~
+        #
+        # The idea here is to basically walk the auth graph of each state set in
+        # tandem, keeping track of which auth events are reachable by each state
+        # set. If we reach an auth event we've already visited (via a different
+        # state set) then we mark that auth event and all ancestors as reachable
+        # by the state set. This requires that we keep track of the auth chains
+        # in memory.
+        #
+        # Doing it in a such a way means that we can stop early if all auth
+        # events we're currently walking are reachable by all state sets.
+        #
+        # *Note*: We can't stop walking an event's auth chain if it is reachable
+        # by all state sets. This is because other auth chains we're walking
+        # might be reachable only via the original auth chain. For example,
+        # given the following auth chain:
+        #
+        #       A -> C -> D -> E
+        #           /         /
+        #       B -´---------´
+        #
+        # and state sets {A} and {B} then walking the auth chains of A and B
+        # would immediately show that C is reachable by both. However, if we
+        # stopped at C then we'd only reach E via the auth chain of B and so E
+        # would errornously get included in the returned difference.
+        #
+        # The other thing that we do is limit the number of auth chains we walk
+        # at once, due to practical limits (i.e. we can only query the database
+        # with a limited set of parameters). We pick the auth chains we walk
+        # each iteration based on their depth, in the hope that events with a
+        # lower depth are likely reachable by those with higher depths.
+        #
+        # We could use any ordering that we believe would give a rough
+        # topological ordering, e.g. origin server timestamp. If the ordering
+        # chosen is not topological then the algorithm still produces the right
+        # result, but perhaps a bit more inefficiently. This is why it is safe
+        # to use "depth" here.
+
+        initial_events = set(state_sets[0]).union(*state_sets[1:])
+
+        # Dict from events in auth chains to which sets *cannot* reach them.
+        # I.e. if the set is empty then all sets can reach the event.
+        event_to_missing_sets = {
+            event_id: {i for i, a in enumerate(state_sets) if event_id not in a}
+            for event_id in initial_events
+        }
+
+        # The sorted list of events whose auth chains we should walk.
+        search = []  # type: List[Tuple[int, str]]
+
+        # We need to get the depth of the initial events for sorting purposes.
+        sql = """
+            SELECT depth, event_id FROM events
+            WHERE %s
+        """
+        # the list can be huge, so let's avoid looking them all up in one massive
+        # query.
+        for batch in batch_iter(initial_events, 1000):
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "event_id", batch
+            )
+            txn.execute(sql % (clause,), args)
+
+            # I think building a temporary list with fetchall is more efficient than
+            # just `search.extend(txn)`, but this is unconfirmed
+            search.extend(txn.fetchall())
+
+        # sort by depth
+        search.sort()
+
+        # Map from event to its auth events
+        event_to_auth_events = {}  # type: Dict[str, Set[str]]
+
+        base_sql = """
+            SELECT a.event_id, auth_id, depth
+            FROM event_auth AS a
+            INNER JOIN events AS e ON (e.event_id = a.auth_id)
+            WHERE
+        """
+
+        while search:
+            # Check whether all our current walks are reachable by all state
+            # sets. If so we can bail.
+            if all(not event_to_missing_sets[eid] for _, eid in search):
+                break
+
+            # Fetch the auth events and their depths of the N last events we're
+            # currently walking
+            search, chunk = search[:-100], search[-100:]
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "a.event_id", [e_id for _, e_id in chunk]
+            )
+            txn.execute(base_sql + clause, args)
+
+            for event_id, auth_event_id, auth_event_depth in txn:
+                event_to_auth_events.setdefault(event_id, set()).add(auth_event_id)
+
+                sets = event_to_missing_sets.get(auth_event_id)
+                if sets is None:
+                    # First time we're seeing this event, so we add it to the
+                    # queue of things to fetch.
+                    search.append((auth_event_depth, auth_event_id))
+
+                    # Assume that this event is unreachable from any of the
+                    # state sets until proven otherwise
+                    sets = event_to_missing_sets[auth_event_id] = set(
+                        range(len(state_sets))
+                    )
+                else:
+                    # We've previously seen this event, so look up its auth
+                    # events and recursively mark all ancestors as reachable
+                    # by the current event's state set.
+                    a_ids = event_to_auth_events.get(auth_event_id)
+                    while a_ids:
+                        new_aids = set()
+                        for a_id in a_ids:
+                            event_to_missing_sets[a_id].intersection_update(
+                                event_to_missing_sets[event_id]
+                            )
+
+                            b = event_to_auth_events.get(a_id)
+                            if b:
+                                new_aids.update(b)
+
+                        a_ids = new_aids
+
+                # Mark that the auth event is reachable by the approriate sets.
+                sets.intersection_update(event_to_missing_sets[event_id])
+
+            search.sort()
+
+        # Return all events where not all sets can reach them.
+        return {eid for eid, n in event_to_missing_sets.items() if n}
+
+    def get_oldest_events_in_room(self, room_id):
+        return self.db_pool.runInteraction(
+            "get_oldest_events_in_room", self._get_oldest_events_in_room_txn, room_id
+        )
+
+    def get_oldest_events_with_depth_in_room(self, room_id):
+        return self.db_pool.runInteraction(
+            "get_oldest_events_with_depth_in_room",
+            self.get_oldest_events_with_depth_in_room_txn,
+            room_id,
+        )
+
+    def get_oldest_events_with_depth_in_room_txn(self, txn, room_id):
+        sql = (
+            "SELECT b.event_id, MAX(e.depth) FROM events as e"
+            " INNER JOIN event_edges as g"
+            " ON g.event_id = e.event_id"
+            " INNER JOIN event_backward_extremities as b"
+            " ON g.prev_event_id = b.event_id"
+            " WHERE b.room_id = ? AND g.is_state is ?"
+            " GROUP BY b.event_id"
+        )
+
+        txn.execute(sql, (room_id, False))
+
+        return dict(txn)
+
+    @defer.inlineCallbacks
+    def get_max_depth_of(self, event_ids):
+        """Returns the max depth of a set of event IDs
+
+        Args:
+            event_ids (list[str])
+
+        Returns
+            Deferred[int]
+        """
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="events",
+            column="event_id",
+            iterable=event_ids,
+            retcols=("depth",),
+            desc="get_max_depth_of",
+        )
+
+        if not rows:
+            return 0
+        else:
+            return max(row["depth"] for row in rows)
+
+    def _get_oldest_events_in_room_txn(self, txn, room_id):
+        return self.db_pool.simple_select_onecol_txn(
+            txn,
+            table="event_backward_extremities",
+            keyvalues={"room_id": room_id},
+            retcol="event_id",
+        )
+
+    def get_prev_events_for_room(self, room_id: str):
+        """
+        Gets a subset of the current forward extremities in the given room.
+
+        Limits the result to 10 extremities, so that we can avoid creating
+        events which refer to hundreds of prev_events.
+
+        Args:
+            room_id (str): room_id
+
+        Returns:
+            Deferred[List[str]]: the event ids of the forward extremites
+
+        """
+
+        return self.db_pool.runInteraction(
+            "get_prev_events_for_room", self._get_prev_events_for_room_txn, room_id
+        )
+
+    def _get_prev_events_for_room_txn(self, txn, room_id: str):
+        # we just use the 10 newest events. Older events will become
+        # prev_events of future events.
+
+        sql = """
+            SELECT e.event_id FROM event_forward_extremities AS f
+            INNER JOIN events AS e USING (event_id)
+            WHERE f.room_id = ?
+            ORDER BY e.depth DESC
+            LIMIT 10
+        """
+
+        txn.execute(sql, (room_id,))
+
+        return [row[0] for row in txn]
+
+    def get_rooms_with_many_extremities(self, min_count, limit, room_id_filter):
+        """Get the top rooms with at least N extremities.
+
+        Args:
+            min_count (int): The minimum number of extremities
+            limit (int): The maximum number of rooms to return.
+            room_id_filter (iterable[str]): room_ids to exclude from the results
+
+        Returns:
+            Deferred[list]: At most `limit` room IDs that have at least
+            `min_count` extremities, sorted by extremity count.
+        """
+
+        def _get_rooms_with_many_extremities_txn(txn):
+            where_clause = "1=1"
+            if room_id_filter:
+                where_clause = "room_id NOT IN (%s)" % (
+                    ",".join("?" for _ in room_id_filter),
+                )
+
+            sql = """
+                SELECT room_id FROM event_forward_extremities
+                WHERE %s
+                GROUP BY room_id
+                HAVING count(*) > ?
+                ORDER BY count(*) DESC
+                LIMIT ?
+            """ % (
+                where_clause,
+            )
+
+            query_args = list(itertools.chain(room_id_filter, [min_count, limit]))
+            txn.execute(sql, query_args)
+            return [room_id for room_id, in txn]
+
+        return self.db_pool.runInteraction(
+            "get_rooms_with_many_extremities", _get_rooms_with_many_extremities_txn
+        )
+
+    @cached(max_entries=5000, iterable=True)
+    def get_latest_event_ids_in_room(self, room_id):
+        return self.db_pool.simple_select_onecol(
+            table="event_forward_extremities",
+            keyvalues={"room_id": room_id},
+            retcol="event_id",
+            desc="get_latest_event_ids_in_room",
+        )
+
+    def get_min_depth(self, room_id):
+        """ For hte given room, get the minimum depth we have seen for it.
+        """
+        return self.db_pool.runInteraction(
+            "get_min_depth", self._get_min_depth_interaction, room_id
+        )
+
+    def _get_min_depth_interaction(self, txn, room_id):
+        min_depth = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="room_depth",
+            keyvalues={"room_id": room_id},
+            retcol="min_depth",
+            allow_none=True,
+        )
+
+        return int(min_depth) if min_depth is not None else None
+
+    def get_forward_extremeties_for_room(self, room_id, stream_ordering):
+        """For a given room_id and stream_ordering, return the forward
+        extremeties of the room at that point in "time".
+
+        Throws a StoreError if we have since purged the index for
+        stream_orderings from that point.
+
+        Args:
+            room_id (str):
+            stream_ordering (int):
+
+        Returns:
+            deferred, which resolves to a list of event_ids
+        """
+        # We want to make the cache more effective, so we clamp to the last
+        # change before the given ordering.
+        last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id)
+
+        # We don't always have a full stream_to_exterm_id table, e.g. after
+        # the upgrade that introduced it, so we make sure we never ask for a
+        # stream_ordering from before a restart
+        last_change = max(self._stream_order_on_start, last_change)
+
+        # provided the last_change is recent enough, we now clamp the requested
+        # stream_ordering to it.
+        if last_change > self.stream_ordering_month_ago:
+            stream_ordering = min(last_change, stream_ordering)
+
+        return self._get_forward_extremeties_for_room(room_id, stream_ordering)
+
+    @cached(max_entries=5000, num_args=2)
+    def _get_forward_extremeties_for_room(self, room_id, stream_ordering):
+        """For a given room_id and stream_ordering, return the forward
+        extremeties of the room at that point in "time".
+
+        Throws a StoreError if we have since purged the index for
+        stream_orderings from that point.
+        """
+
+        if stream_ordering <= self.stream_ordering_month_ago:
+            raise StoreError(400, "stream_ordering too old")
+
+        sql = """
+                SELECT event_id FROM stream_ordering_to_exterm
+                INNER JOIN (
+                    SELECT room_id, MAX(stream_ordering) AS stream_ordering
+                    FROM stream_ordering_to_exterm
+                    WHERE stream_ordering <= ? GROUP BY room_id
+                ) AS rms USING (room_id, stream_ordering)
+                WHERE room_id = ?
+        """
+
+        def get_forward_extremeties_for_room_txn(txn):
+            txn.execute(sql, (stream_ordering, room_id))
+            return [event_id for event_id, in txn]
+
+        return self.db_pool.runInteraction(
+            "get_forward_extremeties_for_room", get_forward_extremeties_for_room_txn
+        )
+
+    def get_backfill_events(self, room_id, event_list, limit):
+        """Get a list of Events for a given topic that occurred before (and
+        including) the events in event_list. Return a list of max size `limit`
+
+        Args:
+            txn
+            room_id (str)
+            event_list (list)
+            limit (int)
+        """
+        return (
+            self.db_pool.runInteraction(
+                "get_backfill_events",
+                self._get_backfill_events,
+                room_id,
+                event_list,
+                limit,
+            )
+            .addCallback(self.get_events_as_list)
+            .addCallback(lambda l: sorted(l, key=lambda e: -e.depth))
+        )
+
+    def _get_backfill_events(self, txn, room_id, event_list, limit):
+        logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit)
+
+        event_results = set()
+
+        # We want to make sure that we do a breadth-first, "depth" ordered
+        # search.
+
+        query = (
+            "SELECT depth, prev_event_id FROM event_edges"
+            " INNER JOIN events"
+            " ON prev_event_id = events.event_id"
+            " WHERE event_edges.event_id = ?"
+            " AND event_edges.is_state = ?"
+            " LIMIT ?"
+        )
+
+        queue = PriorityQueue()
+
+        for event_id in event_list:
+            depth = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="events",
+                keyvalues={"event_id": event_id, "room_id": room_id},
+                retcol="depth",
+                allow_none=True,
+            )
+
+            if depth:
+                queue.put((-depth, event_id))
+
+        while not queue.empty() and len(event_results) < limit:
+            try:
+                _, event_id = queue.get_nowait()
+            except Empty:
+                break
+
+            if event_id in event_results:
+                continue
+
+            event_results.add(event_id)
+
+            txn.execute(query, (event_id, False, limit - len(event_results)))
+
+            for row in txn:
+                if row[1] not in event_results:
+                    queue.put((-row[0], row[1]))
+
+        return event_results
+
+    @defer.inlineCallbacks
+    def get_missing_events(self, room_id, earliest_events, latest_events, limit):
+        ids = yield self.db_pool.runInteraction(
+            "get_missing_events",
+            self._get_missing_events,
+            room_id,
+            earliest_events,
+            latest_events,
+            limit,
+        )
+        events = yield self.get_events_as_list(ids)
+        return events
+
+    def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limit):
+
+        seen_events = set(earliest_events)
+        front = set(latest_events) - seen_events
+        event_results = []
+
+        query = (
+            "SELECT prev_event_id FROM event_edges "
+            "WHERE room_id = ? AND event_id = ? AND is_state = ? "
+            "LIMIT ?"
+        )
+
+        while front and len(event_results) < limit:
+            new_front = set()
+            for event_id in front:
+                txn.execute(
+                    query, (room_id, event_id, False, limit - len(event_results))
+                )
+
+                new_results = {t[0] for t in txn} - seen_events
+
+                new_front |= new_results
+                seen_events |= new_results
+                event_results.extend(new_results)
+
+            front = new_front
+
+        # we built the list working backwards from latest_events; we now need to
+        # reverse it so that the events are approximately chronological.
+        event_results.reverse()
+        return event_results
+
+    @defer.inlineCallbacks
+    def get_successor_events(self, event_ids):
+        """Fetch all events that have the given events as a prev event
+
+        Args:
+            event_ids (iterable[str])
+
+        Returns:
+            Deferred[list[str]]
+        """
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="event_edges",
+            column="prev_event_id",
+            iterable=event_ids,
+            retcols=("event_id",),
+            desc="get_successor_events",
+        )
+
+        return [row["event_id"] for row in rows]
+
+
+class EventFederationStore(EventFederationWorkerStore):
+    """ Responsible for storing and serving up the various graphs associated
+    with an event. Including the main event graph and the auth chains for an
+    event.
+
+    Also has methods for getting the front (latest) and back (oldest) edges
+    of the event graphs. These are used to generate the parents for new events
+    and backfilling from another server respectively.
+    """
+
+    EVENT_AUTH_STATE_ONLY = "event_auth_state_only"
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(EventFederationStore, self).__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            self.EVENT_AUTH_STATE_ONLY, self._background_delete_non_state_event_auth
+        )
+
+        hs.get_clock().looping_call(
+            self._delete_old_forward_extrem_cache, 60 * 60 * 1000
+        )
+
+    def _delete_old_forward_extrem_cache(self):
+        def _delete_old_forward_extrem_cache_txn(txn):
+            # Delete entries older than a month, while making sure we don't delete
+            # the only entries for a room.
+            sql = """
+                DELETE FROM stream_ordering_to_exterm
+                WHERE
+                room_id IN (
+                    SELECT room_id
+                    FROM stream_ordering_to_exterm
+                    WHERE stream_ordering > ?
+                ) AND stream_ordering < ?
+            """
+            txn.execute(
+                sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago)
+            )
+
+        return run_as_background_process(
+            "delete_old_forward_extrem_cache",
+            self.db_pool.runInteraction,
+            "_delete_old_forward_extrem_cache",
+            _delete_old_forward_extrem_cache_txn,
+        )
+
+    def clean_room_for_join(self, room_id):
+        return self.db_pool.runInteraction(
+            "clean_room_for_join", self._clean_room_for_join_txn, room_id
+        )
+
+    def _clean_room_for_join_txn(self, txn, room_id):
+        query = "DELETE FROM event_forward_extremities WHERE room_id = ?"
+
+        txn.execute(query, (room_id,))
+        txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,))
+
+    @defer.inlineCallbacks
+    def _background_delete_non_state_event_auth(self, progress, batch_size):
+        def delete_event_auth(txn):
+            target_min_stream_id = progress.get("target_min_stream_id_inclusive")
+            max_stream_id = progress.get("max_stream_id_exclusive")
+
+            if not target_min_stream_id or not max_stream_id:
+                txn.execute("SELECT COALESCE(MIN(stream_ordering), 0) FROM events")
+                rows = txn.fetchall()
+                target_min_stream_id = rows[0][0]
+
+                txn.execute("SELECT COALESCE(MAX(stream_ordering), 0) FROM events")
+                rows = txn.fetchall()
+                max_stream_id = rows[0][0]
+
+            min_stream_id = max_stream_id - batch_size
+
+            sql = """
+                DELETE FROM event_auth
+                WHERE event_id IN (
+                    SELECT event_id FROM events
+                    LEFT JOIN state_events USING (room_id, event_id)
+                    WHERE ? <= stream_ordering AND stream_ordering < ?
+                        AND state_key IS null
+                )
+            """
+
+            txn.execute(sql, (min_stream_id, max_stream_id))
+
+            new_progress = {
+                "target_min_stream_id_inclusive": target_min_stream_id,
+                "max_stream_id_exclusive": min_stream_id,
+            }
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, self.EVENT_AUTH_STATE_ONLY, new_progress
+            )
+
+            return min_stream_id >= target_min_stream_id
+
+        result = yield self.db_pool.runInteraction(
+            self.EVENT_AUTH_STATE_ONLY, delete_event_auth
+        )
+
+        if not result:
+            yield self.db_pool.updates._end_background_update(
+                self.EVENT_AUTH_STATE_ONLY
+            )
+
+        return batch_size
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@ -0,0 +1,885 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import List
+
+from canonicaljson import json
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import LoggingTransaction, SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.util.caches.descriptors import cachedInlineCallbacks
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_NOTIF_ACTION = ["notify", {"set_tweak": "highlight", "value": False}]
+DEFAULT_HIGHLIGHT_ACTION = [
+    "notify",
+    {"set_tweak": "sound", "value": "default"},
+    {"set_tweak": "highlight"},
+]
+
+
+def _serialize_action(actions, is_highlight):
+    """Custom serializer for actions. This allows us to "compress" common actions.
+
+    We use the fact that most users have the same actions for notifs (and for
+    highlights).
+    We store these default actions as the empty string rather than the full JSON.
+    Since the empty string isn't valid JSON there is no risk of this clashing with
+    any real JSON actions
+    """
+    if is_highlight:
+        if actions == DEFAULT_HIGHLIGHT_ACTION:
+            return ""  # We use empty string as the column is non-NULL
+    else:
+        if actions == DEFAULT_NOTIF_ACTION:
+            return ""
+    return json.dumps(actions)
+
+
+def _deserialize_action(actions, is_highlight):
+    """Custom deserializer for actions. This allows us to "compress" common actions
+    """
+    if actions:
+        return db_to_json(actions)
+
+    if is_highlight:
+        return DEFAULT_HIGHLIGHT_ACTION
+    else:
+        return DEFAULT_NOTIF_ACTION
+
+
+class EventPushActionsWorkerStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(EventPushActionsWorkerStore, self).__init__(database, db_conn, hs)
+
+        # These get correctly set by _find_stream_orderings_for_times_txn
+        self.stream_ordering_month_ago = None
+        self.stream_ordering_day_ago = None
+
+        cur = LoggingTransaction(
+            db_conn.cursor(),
+            name="_find_stream_orderings_for_times_txn",
+            database_engine=self.database_engine,
+        )
+        self._find_stream_orderings_for_times_txn(cur)
+        cur.close()
+
+        self.find_stream_orderings_looping_call = self._clock.looping_call(
+            self._find_stream_orderings_for_times, 10 * 60 * 1000
+        )
+        self._rotate_delay = 3
+        self._rotate_count = 10000
+
+    @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000)
+    def get_unread_event_push_actions_by_room_for_user(
+        self, room_id, user_id, last_read_event_id
+    ):
+        ret = yield self.db_pool.runInteraction(
+            "get_unread_event_push_actions_by_room",
+            self._get_unread_counts_by_receipt_txn,
+            room_id,
+            user_id,
+            last_read_event_id,
+        )
+        return ret
+
+    def _get_unread_counts_by_receipt_txn(
+        self, txn, room_id, user_id, last_read_event_id
+    ):
+        sql = (
+            "SELECT stream_ordering"
+            " FROM events"
+            " WHERE room_id = ? AND event_id = ?"
+        )
+        txn.execute(sql, (room_id, last_read_event_id))
+        results = txn.fetchall()
+        if len(results) == 0:
+            return {"notify_count": 0, "highlight_count": 0}
+
+        stream_ordering = results[0][0]
+
+        return self._get_unread_counts_by_pos_txn(
+            txn, room_id, user_id, stream_ordering
+        )
+
+    def _get_unread_counts_by_pos_txn(self, txn, room_id, user_id, stream_ordering):
+
+        # First get number of notifications.
+        # We don't need to put a notif=1 clause as all rows always have
+        # notif=1
+        sql = (
+            "SELECT count(*)"
+            " FROM event_push_actions ea"
+            " WHERE"
+            " user_id = ?"
+            " AND room_id = ?"
+            " AND stream_ordering > ?"
+        )
+
+        txn.execute(sql, (user_id, room_id, stream_ordering))
+        row = txn.fetchone()
+        notify_count = row[0] if row else 0
+
+        txn.execute(
+            """
+            SELECT notif_count FROM event_push_summary
+            WHERE room_id = ? AND user_id = ? AND stream_ordering > ?
+        """,
+            (room_id, user_id, stream_ordering),
+        )
+        rows = txn.fetchall()
+        if rows:
+            notify_count += rows[0][0]
+
+        # Now get the number of highlights
+        sql = (
+            "SELECT count(*)"
+            " FROM event_push_actions ea"
+            " WHERE"
+            " highlight = 1"
+            " AND user_id = ?"
+            " AND room_id = ?"
+            " AND stream_ordering > ?"
+        )
+
+        txn.execute(sql, (user_id, room_id, stream_ordering))
+        row = txn.fetchone()
+        highlight_count = row[0] if row else 0
+
+        return {"notify_count": notify_count, "highlight_count": highlight_count}
+
+    async def get_push_action_users_in_range(
+        self, min_stream_ordering, max_stream_ordering
+    ):
+        def f(txn):
+            sql = (
+                "SELECT DISTINCT(user_id) FROM event_push_actions WHERE"
+                " stream_ordering >= ? AND stream_ordering <= ?"
+            )
+            txn.execute(sql, (min_stream_ordering, max_stream_ordering))
+            return [r[0] for r in txn]
+
+        ret = await self.db_pool.runInteraction("get_push_action_users_in_range", f)
+        return ret
+
+    async def get_unread_push_actions_for_user_in_range_for_http(
+        self,
+        user_id: str,
+        min_stream_ordering: int,
+        max_stream_ordering: int,
+        limit: int = 20,
+    ) -> List[dict]:
+        """Get a list of the most recent unread push actions for a given user,
+        within the given stream ordering range. Called by the httppusher.
+
+        Args:
+            user_id: The user to fetch push actions for.
+            min_stream_ordering: The exclusive lower bound on the
+                stream ordering of event push actions to fetch.
+            max_stream_ordering: The inclusive upper bound on the
+                stream ordering of event push actions to fetch.
+            limit: The maximum number of rows to return.
+        Returns:
+            A list of dicts with the keys "event_id", "room_id", "stream_ordering", "actions".
+            The list will be ordered by ascending stream_ordering.
+            The list will have between 0~limit entries.
+        """
+        # find rooms that have a read receipt in them and return the next
+        # push actions
+        def get_after_receipt(txn):
+            # find rooms that have a read receipt in them and return the next
+            # push actions
+            sql = (
+                "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+                "   ep.highlight "
+                " FROM ("
+                "   SELECT room_id,"
+                "       MAX(stream_ordering) as stream_ordering"
+                "   FROM events"
+                "   INNER JOIN receipts_linearized USING (room_id, event_id)"
+                "   WHERE receipt_type = 'm.read' AND user_id = ?"
+                "   GROUP BY room_id"
+                ") AS rl,"
+                " event_push_actions AS ep"
+                " WHERE"
+                "   ep.room_id = rl.room_id"
+                "   AND ep.stream_ordering > rl.stream_ordering"
+                "   AND ep.user_id = ?"
+                "   AND ep.stream_ordering > ?"
+                "   AND ep.stream_ordering <= ?"
+                " ORDER BY ep.stream_ordering ASC LIMIT ?"
+            )
+            args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+            txn.execute(sql, args)
+            return txn.fetchall()
+
+        after_read_receipt = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_http_arr", get_after_receipt
+        )
+
+        # There are rooms with push actions in them but you don't have a read receipt in
+        # them e.g. rooms you've been invited to, so get push actions for rooms which do
+        # not have read receipts in them too.
+        def get_no_receipt(txn):
+            sql = (
+                "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+                "   ep.highlight "
+                " FROM event_push_actions AS ep"
+                " INNER JOIN events AS e USING (room_id, event_id)"
+                " WHERE"
+                "   ep.room_id NOT IN ("
+                "     SELECT room_id FROM receipts_linearized"
+                "       WHERE receipt_type = 'm.read' AND user_id = ?"
+                "       GROUP BY room_id"
+                "   )"
+                "   AND ep.user_id = ?"
+                "   AND ep.stream_ordering > ?"
+                "   AND ep.stream_ordering <= ?"
+                " ORDER BY ep.stream_ordering ASC LIMIT ?"
+            )
+            args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+            txn.execute(sql, args)
+            return txn.fetchall()
+
+        no_read_receipt = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_http_nrr", get_no_receipt
+        )
+
+        notifs = [
+            {
+                "event_id": row[0],
+                "room_id": row[1],
+                "stream_ordering": row[2],
+                "actions": _deserialize_action(row[3], row[4]),
+            }
+            for row in after_read_receipt + no_read_receipt
+        ]
+
+        # Now sort it so it's ordered correctly, since currently it will
+        # contain results from the first query, correctly ordered, followed
+        # by results from the second query, but we want them all ordered
+        # by stream_ordering, oldest first.
+        notifs.sort(key=lambda r: r["stream_ordering"])
+
+        # Take only up to the limit. We have to stop at the limit because
+        # one of the subqueries may have hit the limit.
+        return notifs[:limit]
+
+    async def get_unread_push_actions_for_user_in_range_for_email(
+        self,
+        user_id: str,
+        min_stream_ordering: int,
+        max_stream_ordering: int,
+        limit: int = 20,
+    ) -> List[dict]:
+        """Get a list of the most recent unread push actions for a given user,
+        within the given stream ordering range. Called by the emailpusher
+
+        Args:
+            user_id: The user to fetch push actions for.
+            min_stream_ordering: The exclusive lower bound on the
+                stream ordering of event push actions to fetch.
+            max_stream_ordering: The inclusive upper bound on the
+                stream ordering of event push actions to fetch.
+            limit: The maximum number of rows to return.
+        Returns:
+            A list of dicts with the keys "event_id", "room_id", "stream_ordering", "actions", "received_ts".
+            The list will be ordered by descending received_ts.
+            The list will have between 0~limit entries.
+        """
+        # find rooms that have a read receipt in them and return the most recent
+        # push actions
+        def get_after_receipt(txn):
+            sql = (
+                "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+                "  ep.highlight, e.received_ts"
+                " FROM ("
+                "   SELECT room_id,"
+                "       MAX(stream_ordering) as stream_ordering"
+                "   FROM events"
+                "   INNER JOIN receipts_linearized USING (room_id, event_id)"
+                "   WHERE receipt_type = 'm.read' AND user_id = ?"
+                "   GROUP BY room_id"
+                ") AS rl,"
+                " event_push_actions AS ep"
+                " INNER JOIN events AS e USING (room_id, event_id)"
+                " WHERE"
+                "   ep.room_id = rl.room_id"
+                "   AND ep.stream_ordering > rl.stream_ordering"
+                "   AND ep.user_id = ?"
+                "   AND ep.stream_ordering > ?"
+                "   AND ep.stream_ordering <= ?"
+                " ORDER BY ep.stream_ordering DESC LIMIT ?"
+            )
+            args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+            txn.execute(sql, args)
+            return txn.fetchall()
+
+        after_read_receipt = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_email_arr", get_after_receipt
+        )
+
+        # There are rooms with push actions in them but you don't have a read receipt in
+        # them e.g. rooms you've been invited to, so get push actions for rooms which do
+        # not have read receipts in them too.
+        def get_no_receipt(txn):
+            sql = (
+                "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+                "   ep.highlight, e.received_ts"
+                " FROM event_push_actions AS ep"
+                " INNER JOIN events AS e USING (room_id, event_id)"
+                " WHERE"
+                "   ep.room_id NOT IN ("
+                "     SELECT room_id FROM receipts_linearized"
+                "       WHERE receipt_type = 'm.read' AND user_id = ?"
+                "       GROUP BY room_id"
+                "   )"
+                "   AND ep.user_id = ?"
+                "   AND ep.stream_ordering > ?"
+                "   AND ep.stream_ordering <= ?"
+                " ORDER BY ep.stream_ordering DESC LIMIT ?"
+            )
+            args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+            txn.execute(sql, args)
+            return txn.fetchall()
+
+        no_read_receipt = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_email_nrr", get_no_receipt
+        )
+
+        # Make a list of dicts from the two sets of results.
+        notifs = [
+            {
+                "event_id": row[0],
+                "room_id": row[1],
+                "stream_ordering": row[2],
+                "actions": _deserialize_action(row[3], row[4]),
+                "received_ts": row[5],
+            }
+            for row in after_read_receipt + no_read_receipt
+        ]
+
+        # Now sort it so it's ordered correctly, since currently it will
+        # contain results from the first query, correctly ordered, followed
+        # by results from the second query, but we want them all ordered
+        # by received_ts (most recent first)
+        notifs.sort(key=lambda r: -(r["received_ts"] or 0))
+
+        # Now return the first `limit`
+        return notifs[:limit]
+
+    def get_if_maybe_push_in_range_for_user(self, user_id, min_stream_ordering):
+        """A fast check to see if there might be something to push for the
+        user since the given stream ordering. May return false positives.
+
+        Useful to know whether to bother starting a pusher on start up or not.
+
+        Args:
+            user_id (str)
+            min_stream_ordering (int)
+
+        Returns:
+            Deferred[bool]: True if there may be push to process, False if
+            there definitely isn't.
+        """
+
+        def _get_if_maybe_push_in_range_for_user_txn(txn):
+            sql = """
+                SELECT 1 FROM event_push_actions
+                WHERE user_id = ? AND stream_ordering > ?
+                LIMIT 1
+            """
+
+            txn.execute(sql, (user_id, min_stream_ordering))
+            return bool(txn.fetchone())
+
+        return self.db_pool.runInteraction(
+            "get_if_maybe_push_in_range_for_user",
+            _get_if_maybe_push_in_range_for_user_txn,
+        )
+
+    async def add_push_actions_to_staging(self, event_id, user_id_actions):
+        """Add the push actions for the event to the push action staging area.
+
+        Args:
+            event_id (str)
+            user_id_actions (dict[str, list[dict|str])]): A dictionary mapping
+                user_id to list of push actions, where an action can either be
+                a string or dict.
+
+        Returns:
+            Deferred
+        """
+
+        if not user_id_actions:
+            return
+
+        # This is a helper function for generating the necessary tuple that
+        # can be used to inert into the `event_push_actions_staging` table.
+        def _gen_entry(user_id, actions):
+            is_highlight = 1 if _action_has_highlight(actions) else 0
+            return (
+                event_id,  # event_id column
+                user_id,  # user_id column
+                _serialize_action(actions, is_highlight),  # actions column
+                1,  # notif column
+                is_highlight,  # highlight column
+            )
+
+        def _add_push_actions_to_staging_txn(txn):
+            # We don't use simple_insert_many here to avoid the overhead
+            # of generating lists of dicts.
+
+            sql = """
+                INSERT INTO event_push_actions_staging
+                    (event_id, user_id, actions, notif, highlight)
+                VALUES (?, ?, ?, ?, ?)
+            """
+
+            txn.executemany(
+                sql,
+                (
+                    _gen_entry(user_id, actions)
+                    for user_id, actions in user_id_actions.items()
+                ),
+            )
+
+        return await self.db_pool.runInteraction(
+            "add_push_actions_to_staging", _add_push_actions_to_staging_txn
+        )
+
+    async def remove_push_actions_from_staging(self, event_id: str) -> None:
+        """Called if we failed to persist the event to ensure that stale push
+        actions don't build up in the DB
+        """
+
+        try:
+            res = await self.db_pool.simple_delete(
+                table="event_push_actions_staging",
+                keyvalues={"event_id": event_id},
+                desc="remove_push_actions_from_staging",
+            )
+            return res
+        except Exception:
+            # this method is called from an exception handler, so propagating
+            # another exception here really isn't helpful - there's nothing
+            # the caller can do about it. Just log the exception and move on.
+            logger.exception(
+                "Error removing push actions after event persistence failure"
+            )
+
+    def _find_stream_orderings_for_times(self):
+        return run_as_background_process(
+            "event_push_action_stream_orderings",
+            self.db_pool.runInteraction,
+            "_find_stream_orderings_for_times",
+            self._find_stream_orderings_for_times_txn,
+        )
+
+    def _find_stream_orderings_for_times_txn(self, txn):
+        logger.info("Searching for stream ordering 1 month ago")
+        self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn(
+            txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000
+        )
+        logger.info(
+            "Found stream ordering 1 month ago: it's %d", self.stream_ordering_month_ago
+        )
+        logger.info("Searching for stream ordering 1 day ago")
+        self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn(
+            txn, self._clock.time_msec() - 24 * 60 * 60 * 1000
+        )
+        logger.info(
+            "Found stream ordering 1 day ago: it's %d", self.stream_ordering_day_ago
+        )
+
+    def find_first_stream_ordering_after_ts(self, ts):
+        """Gets the stream ordering corresponding to a given timestamp.
+
+        Specifically, finds the stream_ordering of the first event that was
+        received on or after the timestamp. This is done by a binary search on
+        the events table, since there is no index on received_ts, so is
+        relatively slow.
+
+        Args:
+            ts (int): timestamp in millis
+
+        Returns:
+            Deferred[int]: stream ordering of the first event received on/after
+                the timestamp
+        """
+        return self.db_pool.runInteraction(
+            "_find_first_stream_ordering_after_ts_txn",
+            self._find_first_stream_ordering_after_ts_txn,
+            ts,
+        )
+
+    @staticmethod
+    def _find_first_stream_ordering_after_ts_txn(txn, ts):
+        """
+        Find the stream_ordering of the first event that was received on or
+        after a given timestamp. This is relatively slow as there is no index
+        on received_ts but we can then use this to delete push actions before
+        this.
+
+        received_ts must necessarily be in the same order as stream_ordering
+        and stream_ordering is indexed, so we manually binary search using
+        stream_ordering
+
+        Args:
+            txn (twisted.enterprise.adbapi.Transaction):
+            ts (int): timestamp to search for
+
+        Returns:
+            int: stream ordering
+        """
+        txn.execute("SELECT MAX(stream_ordering) FROM events")
+        max_stream_ordering = txn.fetchone()[0]
+
+        if max_stream_ordering is None:
+            return 0
+
+        # We want the first stream_ordering in which received_ts is greater
+        # than or equal to ts. Call this point X.
+        #
+        # We maintain the invariants:
+        #
+        #   range_start <= X <= range_end
+        #
+        range_start = 0
+        range_end = max_stream_ordering + 1
+
+        # Given a stream_ordering, look up the timestamp at that
+        # stream_ordering.
+        #
+        # The array may be sparse (we may be missing some stream_orderings).
+        # We treat the gaps as the same as having the same value as the
+        # preceding entry, because we will pick the lowest stream_ordering
+        # which satisfies our requirement of received_ts >= ts.
+        #
+        # For example, if our array of events indexed by stream_ordering is
+        # [10, <none>, 20], we should treat this as being equivalent to
+        # [10, 10, 20].
+        #
+        sql = (
+            "SELECT received_ts FROM events"
+            " WHERE stream_ordering <= ?"
+            " ORDER BY stream_ordering DESC"
+            " LIMIT 1"
+        )
+
+        while range_end - range_start > 0:
+            middle = (range_end + range_start) // 2
+            txn.execute(sql, (middle,))
+            row = txn.fetchone()
+            if row is None:
+                # no rows with stream_ordering<=middle
+                range_start = middle + 1
+                continue
+
+            middle_ts = row[0]
+            if ts > middle_ts:
+                # we got a timestamp lower than the one we were looking for.
+                # definitely need to look higher: X > middle.
+                range_start = middle + 1
+            else:
+                # we got a timestamp higher than (or the same as) the one we
+                # were looking for. We aren't yet sure about the point we
+                # looked up, but we can be sure that X <= middle.
+                range_end = middle
+
+        return range_end
+
+    async def get_time_of_last_push_action_before(self, stream_ordering):
+        def f(txn):
+            sql = (
+                "SELECT e.received_ts"
+                " FROM event_push_actions AS ep"
+                " JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id"
+                " WHERE ep.stream_ordering > ?"
+                " ORDER BY ep.stream_ordering ASC"
+                " LIMIT 1"
+            )
+            txn.execute(sql, (stream_ordering,))
+            return txn.fetchone()
+
+        result = await self.db_pool.runInteraction(
+            "get_time_of_last_push_action_before", f
+        )
+        return result[0] if result else None
+
+
+class EventPushActionsStore(EventPushActionsWorkerStore):
+    EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(EventPushActionsStore, self).__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_index_update(
+            self.EPA_HIGHLIGHT_INDEX,
+            index_name="event_push_actions_u_highlight",
+            table="event_push_actions",
+            columns=["user_id", "stream_ordering"],
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_highlights_index",
+            index_name="event_push_actions_highlights_index",
+            table="event_push_actions",
+            columns=["user_id", "room_id", "topological_ordering", "stream_ordering"],
+            where_clause="highlight=1",
+        )
+
+        self._doing_notif_rotation = False
+        self._rotate_notif_loop = self._clock.looping_call(
+            self._start_rotate_notifs, 30 * 60 * 1000
+        )
+
+    async def get_push_actions_for_user(
+        self, user_id, before=None, limit=50, only_highlight=False
+    ):
+        def f(txn):
+            before_clause = ""
+            if before:
+                before_clause = "AND epa.stream_ordering < ?"
+                args = [user_id, before, limit]
+            else:
+                args = [user_id, limit]
+
+            if only_highlight:
+                if len(before_clause) > 0:
+                    before_clause += " "
+                before_clause += "AND epa.highlight = 1"
+
+            # NB. This assumes event_ids are globally unique since
+            # it makes the query easier to index
+            sql = (
+                "SELECT epa.event_id, epa.room_id,"
+                " epa.stream_ordering, epa.topological_ordering,"
+                " epa.actions, epa.highlight, epa.profile_tag, e.received_ts"
+                " FROM event_push_actions epa, events e"
+                " WHERE epa.event_id = e.event_id"
+                " AND epa.user_id = ? %s"
+                " ORDER BY epa.stream_ordering DESC"
+                " LIMIT ?" % (before_clause,)
+            )
+            txn.execute(sql, args)
+            return self.db_pool.cursor_to_dict(txn)
+
+        push_actions = await self.db_pool.runInteraction("get_push_actions_for_user", f)
+        for pa in push_actions:
+            pa["actions"] = _deserialize_action(pa["actions"], pa["highlight"])
+        return push_actions
+
+    async def get_latest_push_action_stream_ordering(self):
+        def f(txn):
+            txn.execute("SELECT MAX(stream_ordering) FROM event_push_actions")
+            return txn.fetchone()
+
+        result = await self.db_pool.runInteraction(
+            "get_latest_push_action_stream_ordering", f
+        )
+        return result[0] or 0
+
+    def _remove_old_push_actions_before_txn(
+        self, txn, room_id, user_id, stream_ordering
+    ):
+        """
+        Purges old push actions for a user and room before a given
+        stream_ordering.
+
+        We however keep a months worth of highlighted notifications, so that
+        users can still get a list of recent highlights.
+
+        Args:
+            txn: The transcation
+            room_id: Room ID to delete from
+            user_id: user ID to delete for
+            stream_ordering: The lowest stream ordering which will
+                                  not be deleted.
+        """
+        txn.call_after(
+            self.get_unread_event_push_actions_by_room_for_user.invalidate_many,
+            (room_id, user_id),
+        )
+
+        # We need to join on the events table to get the received_ts for
+        # event_push_actions and sqlite won't let us use a join in a delete so
+        # we can't just delete where received_ts < x. Furthermore we can
+        # only identify event_push_actions by a tuple of room_id, event_id
+        # we we can't use a subquery.
+        # Instead, we look up the stream ordering for the last event in that
+        # room received before the threshold time and delete event_push_actions
+        # in the room with a stream_odering before that.
+        txn.execute(
+            "DELETE FROM event_push_actions "
+            " WHERE user_id = ? AND room_id = ? AND "
+            " stream_ordering <= ?"
+            " AND ((stream_ordering < ? AND highlight = 1) or highlight = 0)",
+            (user_id, room_id, stream_ordering, self.stream_ordering_month_ago),
+        )
+
+        txn.execute(
+            """
+            DELETE FROM event_push_summary
+            WHERE room_id = ? AND user_id = ? AND stream_ordering <= ?
+        """,
+            (room_id, user_id, stream_ordering),
+        )
+
+    def _start_rotate_notifs(self):
+        return run_as_background_process("rotate_notifs", self._rotate_notifs)
+
+    async def _rotate_notifs(self):
+        if self._doing_notif_rotation or self.stream_ordering_day_ago is None:
+            return
+        self._doing_notif_rotation = True
+
+        try:
+            while True:
+                logger.info("Rotating notifications")
+
+                caught_up = await self.db_pool.runInteraction(
+                    "_rotate_notifs", self._rotate_notifs_txn
+                )
+                if caught_up:
+                    break
+                await self.hs.get_clock().sleep(self._rotate_delay)
+        finally:
+            self._doing_notif_rotation = False
+
+    def _rotate_notifs_txn(self, txn):
+        """Archives older notifications into event_push_summary. Returns whether
+        the archiving process has caught up or not.
+        """
+
+        old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="event_push_summary_stream_ordering",
+            keyvalues={},
+            retcol="stream_ordering",
+        )
+
+        # We don't to try and rotate millions of rows at once, so we cap the
+        # maximum stream ordering we'll rotate before.
+        txn.execute(
+            """
+            SELECT stream_ordering FROM event_push_actions
+            WHERE stream_ordering > ?
+            ORDER BY stream_ordering ASC LIMIT 1 OFFSET ?
+        """,
+            (old_rotate_stream_ordering, self._rotate_count),
+        )
+        stream_row = txn.fetchone()
+        if stream_row:
+            (offset_stream_ordering,) = stream_row
+            rotate_to_stream_ordering = min(
+                self.stream_ordering_day_ago, offset_stream_ordering
+            )
+            caught_up = offset_stream_ordering >= self.stream_ordering_day_ago
+        else:
+            rotate_to_stream_ordering = self.stream_ordering_day_ago
+            caught_up = True
+
+        logger.info("Rotating notifications up to: %s", rotate_to_stream_ordering)
+
+        self._rotate_notifs_before_txn(txn, rotate_to_stream_ordering)
+
+        # We have caught up iff we were limited by `stream_ordering_day_ago`
+        return caught_up
+
+    def _rotate_notifs_before_txn(self, txn, rotate_to_stream_ordering):
+        old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="event_push_summary_stream_ordering",
+            keyvalues={},
+            retcol="stream_ordering",
+        )
+
+        # Calculate the new counts that should be upserted into event_push_summary
+        sql = """
+            SELECT user_id, room_id,
+                coalesce(old.notif_count, 0) + upd.notif_count,
+                upd.stream_ordering,
+                old.user_id
+            FROM (
+                SELECT user_id, room_id, count(*) as notif_count,
+                    max(stream_ordering) as stream_ordering
+                FROM event_push_actions
+                WHERE ? <= stream_ordering AND stream_ordering < ?
+                    AND highlight = 0
+                GROUP BY user_id, room_id
+            ) AS upd
+            LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+        """
+
+        txn.execute(sql, (old_rotate_stream_ordering, rotate_to_stream_ordering))
+        rows = txn.fetchall()
+
+        logger.info("Rotating notifications, handling %d rows", len(rows))
+
+        # If the `old.user_id` above is NULL then we know there isn't already an
+        # entry in the table, so we simply insert it. Otherwise we update the
+        # existing table.
+        self.db_pool.simple_insert_many_txn(
+            txn,
+            table="event_push_summary",
+            values=[
+                {
+                    "user_id": row[0],
+                    "room_id": row[1],
+                    "notif_count": row[2],
+                    "stream_ordering": row[3],
+                }
+                for row in rows
+                if row[4] is None
+            ],
+        )
+
+        txn.executemany(
+            """
+                UPDATE event_push_summary SET notif_count = ?, stream_ordering = ?
+                WHERE user_id = ? AND room_id = ?
+            """,
+            ((row[2], row[3], row[0], row[1]) for row in rows if row[4] is not None),
+        )
+
+        txn.execute(
+            "DELETE FROM event_push_actions"
+            " WHERE ? <= stream_ordering AND stream_ordering < ? AND highlight = 0",
+            (old_rotate_stream_ordering, rotate_to_stream_ordering),
+        )
+
+        logger.info("Rotating notifications, deleted %s push actions", txn.rowcount)
+
+        txn.execute(
+            "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?",
+            (rotate_to_stream_ordering,),
+        )
+
+
+def _action_has_highlight(actions):
+    for action in actions:
+        try:
+            if action.get("set_tweak", None) == "highlight":
+                return action.get("value", True)
+        except AttributeError:
+            pass
+
+    return False
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@ -0,0 +1,585 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventContentFields
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+
+logger = logging.getLogger(__name__)
+
+
+class EventsBackgroundUpdatesStore(SQLBaseStore):
+
+    EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
+    EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
+    DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities"
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(EventsBackgroundUpdatesStore, self).__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts
+        )
+        self.db_pool.updates.register_background_update_handler(
+            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME,
+            self._background_reindex_fields_sender,
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "event_contains_url_index",
+            index_name="event_contains_url_index",
+            table="events",
+            columns=["room_id", "topological_ordering", "stream_ordering"],
+            where_clause="contains_url = true AND outlier = false",
+        )
+
+        # an event_id index on event_search is useful for the purge_history
+        # api. Plus it means we get to enforce some integrity with a UNIQUE
+        # clause
+        self.db_pool.updates.register_background_index_update(
+            "event_search_event_id_idx",
+            index_name="event_search_event_id_idx",
+            table="event_search",
+            columns=["event_id"],
+            unique=True,
+            psql_only=True,
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "redactions_received_ts", self._redactions_received_ts
+        )
+
+        # This index gets deleted in `event_fix_redactions_bytes` update
+        self.db_pool.updates.register_background_index_update(
+            "event_fix_redactions_bytes_create_index",
+            index_name="redactions_censored_redacts",
+            table="redactions",
+            columns=["redacts"],
+            where_clause="have_censored",
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "event_fix_redactions_bytes", self._event_fix_redactions_bytes
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "event_store_labels", self._event_store_labels
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "redactions_have_censored_ts_idx",
+            index_name="redactions_have_censored_ts",
+            table="redactions",
+            columns=["received_ts"],
+            where_clause="NOT have_censored",
+        )
+
+    @defer.inlineCallbacks
+    def _background_reindex_fields_sender(self, progress, batch_size):
+        target_min_stream_id = progress["target_min_stream_id_inclusive"]
+        max_stream_id = progress["max_stream_id_exclusive"]
+        rows_inserted = progress.get("rows_inserted", 0)
+
+        INSERT_CLUMP_SIZE = 1000
+
+        def reindex_txn(txn):
+            sql = (
+                "SELECT stream_ordering, event_id, json FROM events"
+                " INNER JOIN event_json USING (event_id)"
+                " WHERE ? <= stream_ordering AND stream_ordering < ?"
+                " ORDER BY stream_ordering DESC"
+                " LIMIT ?"
+            )
+
+            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            min_stream_id = rows[-1][0]
+
+            update_rows = []
+            for row in rows:
+                try:
+                    event_id = row[1]
+                    event_json = db_to_json(row[2])
+                    sender = event_json["sender"]
+                    content = event_json["content"]
+
+                    contains_url = "url" in content
+                    if contains_url:
+                        contains_url &= isinstance(content["url"], str)
+                except (KeyError, AttributeError):
+                    # If the event is missing a necessary field then
+                    # skip over it.
+                    continue
+
+                update_rows.append((sender, contains_url, event_id))
+
+            sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"
+
+            for index in range(0, len(update_rows), INSERT_CLUMP_SIZE):
+                clump = update_rows[index : index + INSERT_CLUMP_SIZE]
+                txn.executemany(sql, clump)
+
+            progress = {
+                "target_min_stream_id_inclusive": target_min_stream_id,
+                "max_stream_id_exclusive": min_stream_id,
+                "rows_inserted": rows_inserted + len(rows),
+            }
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress
+            )
+
+            return len(rows)
+
+        result = yield self.db_pool.runInteraction(
+            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn
+        )
+
+        if not result:
+            yield self.db_pool.updates._end_background_update(
+                self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME
+            )
+
+        return result
+
+    @defer.inlineCallbacks
+    def _background_reindex_origin_server_ts(self, progress, batch_size):
+        target_min_stream_id = progress["target_min_stream_id_inclusive"]
+        max_stream_id = progress["max_stream_id_exclusive"]
+        rows_inserted = progress.get("rows_inserted", 0)
+
+        INSERT_CLUMP_SIZE = 1000
+
+        def reindex_search_txn(txn):
+            sql = (
+                "SELECT stream_ordering, event_id FROM events"
+                " WHERE ? <= stream_ordering AND stream_ordering < ?"
+                " ORDER BY stream_ordering DESC"
+                " LIMIT ?"
+            )
+
+            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            min_stream_id = rows[-1][0]
+            event_ids = [row[1] for row in rows]
+
+            rows_to_update = []
+
+            chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)]
+            for chunk in chunks:
+                ev_rows = self.db_pool.simple_select_many_txn(
+                    txn,
+                    table="event_json",
+                    column="event_id",
+                    iterable=chunk,
+                    retcols=["event_id", "json"],
+                    keyvalues={},
+                )
+
+                for row in ev_rows:
+                    event_id = row["event_id"]
+                    event_json = db_to_json(row["json"])
+                    try:
+                        origin_server_ts = event_json["origin_server_ts"]
+                    except (KeyError, AttributeError):
+                        # If the event is missing a necessary field then
+                        # skip over it.
+                        continue
+
+                    rows_to_update.append((origin_server_ts, event_id))
+
+            sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"
+
+            for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE):
+                clump = rows_to_update[index : index + INSERT_CLUMP_SIZE]
+                txn.executemany(sql, clump)
+
+            progress = {
+                "target_min_stream_id_inclusive": target_min_stream_id,
+                "max_stream_id_exclusive": min_stream_id,
+                "rows_inserted": rows_inserted + len(rows_to_update),
+            }
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress
+            )
+
+            return len(rows_to_update)
+
+        result = yield self.db_pool.runInteraction(
+            self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn
+        )
+
+        if not result:
+            yield self.db_pool.updates._end_background_update(
+                self.EVENT_ORIGIN_SERVER_TS_NAME
+            )
+
+        return result
+
+    @defer.inlineCallbacks
+    def _cleanup_extremities_bg_update(self, progress, batch_size):
+        """Background update to clean out extremities that should have been
+        deleted previously.
+
+        Mainly used to deal with the aftermath of #5269.
+        """
+
+        # This works by first copying all existing forward extremities into the
+        # `_extremities_to_check` table at start up, and then checking each
+        # event in that table whether we have any descendants that are not
+        # soft-failed/rejected. If that is the case then we delete that event
+        # from the forward extremities table.
+        #
+        # For efficiency, we do this in batches by recursively pulling out all
+        # descendants of a batch until we find the non soft-failed/rejected
+        # events, i.e. the set of descendants whose chain of prev events back
+        # to the batch of extremities are all soft-failed or rejected.
+        # Typically, we won't find any such events as extremities will rarely
+        # have any descendants, but if they do then we should delete those
+        # extremities.
+
+        def _cleanup_extremities_bg_update_txn(txn):
+            # The set of extremity event IDs that we're checking this round
+            original_set = set()
+
+            # A dict[str, set[str]] of event ID to their prev events.
+            graph = {}
+
+            # The set of descendants of the original set that are not rejected
+            # nor soft-failed. Ancestors of these events should be removed
+            # from the forward extremities table.
+            non_rejected_leaves = set()
+
+            # Set of event IDs that have been soft failed, and for which we
+            # should check if they have descendants which haven't been soft
+            # failed.
+            soft_failed_events_to_lookup = set()
+
+            # First, we get `batch_size` events from the table, pulling out
+            # their successor events, if any, and the successor events'
+            # rejection status.
+            txn.execute(
+                """SELECT prev_event_id, event_id, internal_metadata,
+                    rejections.event_id IS NOT NULL, events.outlier
+                FROM (
+                    SELECT event_id AS prev_event_id
+                    FROM _extremities_to_check
+                    LIMIT ?
+                ) AS f
+                LEFT JOIN event_edges USING (prev_event_id)
+                LEFT JOIN events USING (event_id)
+                LEFT JOIN event_json USING (event_id)
+                LEFT JOIN rejections USING (event_id)
+                """,
+                (batch_size,),
+            )
+
+            for prev_event_id, event_id, metadata, rejected, outlier in txn:
+                original_set.add(prev_event_id)
+
+                if not event_id or outlier:
+                    # Common case where the forward extremity doesn't have any
+                    # descendants.
+                    continue
+
+                graph.setdefault(event_id, set()).add(prev_event_id)
+
+                soft_failed = False
+                if metadata:
+                    soft_failed = db_to_json(metadata).get("soft_failed")
+
+                if soft_failed or rejected:
+                    soft_failed_events_to_lookup.add(event_id)
+                else:
+                    non_rejected_leaves.add(event_id)
+
+            # Now we recursively check all the soft-failed descendants we
+            # found above in the same way, until we have nothing left to
+            # check.
+            while soft_failed_events_to_lookup:
+                # We only want to do 100 at a time, so we split given list
+                # into two.
+                batch = list(soft_failed_events_to_lookup)
+                to_check, to_defer = batch[:100], batch[100:]
+                soft_failed_events_to_lookup = set(to_defer)
+
+                sql = """SELECT prev_event_id, event_id, internal_metadata,
+                    rejections.event_id IS NOT NULL
+                    FROM event_edges
+                    INNER JOIN events USING (event_id)
+                    INNER JOIN event_json USING (event_id)
+                    LEFT JOIN rejections USING (event_id)
+                    WHERE
+                        NOT events.outlier
+                        AND
+                """
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "prev_event_id", to_check
+                )
+                txn.execute(sql + clause, list(args))
+
+                for prev_event_id, event_id, metadata, rejected in txn:
+                    if event_id in graph:
+                        # Already handled this event previously, but we still
+                        # want to record the edge.
+                        graph[event_id].add(prev_event_id)
+                        continue
+
+                    graph[event_id] = {prev_event_id}
+
+                    soft_failed = db_to_json(metadata).get("soft_failed")
+                    if soft_failed or rejected:
+                        soft_failed_events_to_lookup.add(event_id)
+                    else:
+                        non_rejected_leaves.add(event_id)
+
+            # We have a set of non-soft-failed descendants, so we recurse up
+            # the graph to find all ancestors and add them to the set of event
+            # IDs that we can delete from forward extremities table.
+            to_delete = set()
+            while non_rejected_leaves:
+                event_id = non_rejected_leaves.pop()
+                prev_event_ids = graph.get(event_id, set())
+                non_rejected_leaves.update(prev_event_ids)
+                to_delete.update(prev_event_ids)
+
+            to_delete.intersection_update(original_set)
+
+            deleted = self.db_pool.simple_delete_many_txn(
+                txn=txn,
+                table="event_forward_extremities",
+                column="event_id",
+                iterable=to_delete,
+                keyvalues={},
+            )
+
+            logger.info(
+                "Deleted %d forward extremities of %d checked, to clean up #5269",
+                deleted,
+                len(original_set),
+            )
+
+            if deleted:
+                # We now need to invalidate the caches of these rooms
+                rows = self.db_pool.simple_select_many_txn(
+                    txn,
+                    table="events",
+                    column="event_id",
+                    iterable=to_delete,
+                    keyvalues={},
+                    retcols=("room_id",),
+                )
+                room_ids = {row["room_id"] for row in rows}
+                for room_id in room_ids:
+                    txn.call_after(
+                        self.get_latest_event_ids_in_room.invalidate, (room_id,)
+                    )
+
+            self.db_pool.simple_delete_many_txn(
+                txn=txn,
+                table="_extremities_to_check",
+                column="event_id",
+                iterable=original_set,
+                keyvalues={},
+            )
+
+            return len(original_set)
+
+        num_handled = yield self.db_pool.runInteraction(
+            "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn
+        )
+
+        if not num_handled:
+            yield self.db_pool.updates._end_background_update(
+                self.DELETE_SOFT_FAILED_EXTREMITIES
+            )
+
+            def _drop_table_txn(txn):
+                txn.execute("DROP TABLE _extremities_to_check")
+
+            yield self.db_pool.runInteraction(
+                "_cleanup_extremities_bg_update_drop_table", _drop_table_txn
+            )
+
+        return num_handled
+
+    @defer.inlineCallbacks
+    def _redactions_received_ts(self, progress, batch_size):
+        """Handles filling out the `received_ts` column in redactions.
+        """
+        last_event_id = progress.get("last_event_id", "")
+
+        def _redactions_received_ts_txn(txn):
+            # Fetch the set of event IDs that we want to update
+            sql = """
+                SELECT event_id FROM redactions
+                WHERE event_id > ?
+                ORDER BY event_id ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (last_event_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            (upper_event_id,) = rows[-1]
+
+            # Update the redactions with the received_ts.
+            #
+            # Note: Not all events have an associated received_ts, so we
+            # fallback to using origin_server_ts. If we for some reason don't
+            # have an origin_server_ts, lets just use the current timestamp.
+            #
+            # We don't want to leave it null, as then we'll never try and
+            # censor those redactions.
+            sql = """
+                UPDATE redactions
+                SET received_ts = (
+                    SELECT COALESCE(received_ts, origin_server_ts, ?) FROM events
+                    WHERE events.event_id = redactions.event_id
+                )
+                WHERE ? <= event_id AND event_id <= ?
+            """
+
+            txn.execute(sql, (self._clock.time_msec(), last_event_id, upper_event_id))
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "redactions_received_ts", {"last_event_id": upper_event_id}
+            )
+
+            return len(rows)
+
+        count = yield self.db_pool.runInteraction(
+            "_redactions_received_ts", _redactions_received_ts_txn
+        )
+
+        if not count:
+            yield self.db_pool.updates._end_background_update("redactions_received_ts")
+
+        return count
+
+    @defer.inlineCallbacks
+    def _event_fix_redactions_bytes(self, progress, batch_size):
+        """Undoes hex encoded censored redacted event JSON.
+        """
+
+        def _event_fix_redactions_bytes_txn(txn):
+            # This update is quite fast due to new index.
+            txn.execute(
+                """
+                UPDATE event_json
+                SET
+                    json = convert_from(json::bytea, 'utf8')
+                FROM redactions
+                WHERE
+                    redactions.have_censored
+                    AND event_json.event_id = redactions.redacts
+                    AND json NOT LIKE '{%';
+                """
+            )
+
+            txn.execute("DROP INDEX redactions_censored_redacts")
+
+        yield self.db_pool.runInteraction(
+            "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn
+        )
+
+        yield self.db_pool.updates._end_background_update("event_fix_redactions_bytes")
+
+        return 1
+
+    @defer.inlineCallbacks
+    def _event_store_labels(self, progress, batch_size):
+        """Background update handler which will store labels for existing events."""
+        last_event_id = progress.get("last_event_id", "")
+
+        def _event_store_labels_txn(txn):
+            txn.execute(
+                """
+                SELECT event_id, json FROM event_json
+                LEFT JOIN event_labels USING (event_id)
+                WHERE event_id > ? AND label IS NULL
+                ORDER BY event_id LIMIT ?
+                """,
+                (last_event_id, batch_size),
+            )
+
+            results = list(txn)
+
+            nbrows = 0
+            last_row_event_id = ""
+            for (event_id, event_json_raw) in results:
+                try:
+                    event_json = db_to_json(event_json_raw)
+
+                    self.db_pool.simple_insert_many_txn(
+                        txn=txn,
+                        table="event_labels",
+                        values=[
+                            {
+                                "event_id": event_id,
+                                "label": label,
+                                "room_id": event_json["room_id"],
+                                "topological_ordering": event_json["depth"],
+                            }
+                            for label in event_json["content"].get(
+                                EventContentFields.LABELS, []
+                            )
+                            if isinstance(label, str)
+                        ],
+                    )
+                except Exception as e:
+                    logger.warning(
+                        "Unable to load event %s (no labels will be imported): %s",
+                        event_id,
+                        e,
+                    )
+
+                nbrows += 1
+                last_row_event_id = event_id
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_store_labels", {"last_event_id": last_row_event_id}
+            )
+
+            return nbrows
+
+        num_rows = yield self.db_pool.runInteraction(
+            desc="event_store_labels", func=_event_store_labels_txn
+        )
+
+        if not num_rows:
+            yield self.db_pool.updates._end_background_update("event_store_labels")
+
+        return num_rows
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from canonicaljson import encode_canonical_json
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.util.caches.descriptors import cachedInlineCallbacks
+
+
+class FilteringStore(SQLBaseStore):
+    @cachedInlineCallbacks(num_args=2)
+    def get_user_filter(self, user_localpart, filter_id):
+        # filter_id is BIGINT UNSIGNED, so if it isn't a number, fail
+        # with a coherent error message rather than 500 M_UNKNOWN.
+        try:
+            int(filter_id)
+        except ValueError:
+            raise SynapseError(400, "Invalid filter ID", Codes.INVALID_PARAM)
+
+        def_json = yield self.db_pool.simple_select_one_onecol(
+            table="user_filters",
+            keyvalues={"user_id": user_localpart, "filter_id": filter_id},
+            retcol="filter_json",
+            allow_none=False,
+            desc="get_user_filter",
+        )
+
+        return db_to_json(def_json)
+
+    def add_user_filter(self, user_localpart, user_filter):
+        def_json = encode_canonical_json(user_filter)
+
+        # Need an atomic transaction to SELECT the maximal ID so far then
+        # INSERT a new one
+        def _do_txn(txn):
+            sql = (
+                "SELECT filter_id FROM user_filters "
+                "WHERE user_id = ? AND filter_json = ?"
+            )
+            txn.execute(sql, (user_localpart, bytearray(def_json)))
+            filter_id_response = txn.fetchone()
+            if filter_id_response is not None:
+                return filter_id_response[0]
+
+            sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?"
+            txn.execute(sql, (user_localpart,))
+            max_id = txn.fetchone()[0]
+            if max_id is None:
+                filter_id = 0
+            else:
+                filter_id = max_id + 1
+
+            sql = (
+                "INSERT INTO user_filters (user_id, filter_id, filter_json)"
+                "VALUES(?, ?, ?)"
+            )
+            txn.execute(sql, (user_localpart, filter_id, bytearray(def_json)))
+
+            return filter_id
+
+        return self.db_pool.runInteraction("add_user_filter", _do_txn)
--- a/synapse/storage/databases/main/group_server.py
+++ b/synapse/storage/databases/main/group_server.py
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import logging
+
+from signedjson.key import decode_verify_key_bytes
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.keys import FetchKeyResult
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+db_binary_type = memoryview
+
+
+class KeyStore(SQLBaseStore):
+    """Persistence for signature verification keys
+    """
+
+    @cached()
+    def _get_server_verify_key(self, server_name_and_key_id):
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids"
+    )
+    def get_server_verify_keys(self, server_name_and_key_ids):
+        """
+        Args:
+            server_name_and_key_ids (iterable[Tuple[str, str]]):
+                iterable of (server_name, key-id) tuples to fetch keys for
+
+        Returns:
+            Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]:
+                map from (server_name, key_id) -> FetchKeyResult, or None if the key is
+                unknown
+        """
+        keys = {}
+
+        def _get_keys(txn, batch):
+            """Processes a batch of keys to fetch, and adds the result to `keys`."""
+
+            # batch_iter always returns tuples so it's safe to do len(batch)
+            sql = (
+                "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
+                "FROM server_signature_keys WHERE 1=0"
+            ) + " OR (server_name=? AND key_id=?)" * len(batch)
+
+            txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
+
+            for row in txn:
+                server_name, key_id, key_bytes, ts_valid_until_ms = row
+
+                if ts_valid_until_ms is None:
+                    # Old keys may be stored with a ts_valid_until_ms of null,
+                    # in which case we treat this as if it was set to `0`, i.e.
+                    # it won't match key requests that define a minimum
+                    # `ts_valid_until_ms`.
+                    ts_valid_until_ms = 0
+
+                res = FetchKeyResult(
+                    verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
+                    valid_until_ts=ts_valid_until_ms,
+                )
+                keys[(server_name, key_id)] = res
+
+        def _txn(txn):
+            for batch in batch_iter(server_name_and_key_ids, 50):
+                _get_keys(txn, batch)
+            return keys
+
+        return self.db_pool.runInteraction("get_server_verify_keys", _txn)
+
+    def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys):
+        """Stores NACL verification keys for remote servers.
+        Args:
+            from_server (str): Where the verification keys were looked up
+            ts_added_ms (int): The time to record that the key was added
+            verify_keys (iterable[tuple[str, str, FetchKeyResult]]):
+                keys to be stored. Each entry is a triplet of
+                (server_name, key_id, key).
+        """
+        key_values = []
+        value_values = []
+        invalidations = []
+        for server_name, key_id, fetch_result in verify_keys:
+            key_values.append((server_name, key_id))
+            value_values.append(
+                (
+                    from_server,
+                    ts_added_ms,
+                    fetch_result.valid_until_ts,
+                    db_binary_type(fetch_result.verify_key.encode()),
+                )
+            )
+            # invalidate takes a tuple corresponding to the params of
+            # _get_server_verify_key. _get_server_verify_key only takes one
+            # param, which is itself the 2-tuple (server_name, key_id).
+            invalidations.append((server_name, key_id))
+
+        def _invalidate(res):
+            f = self._get_server_verify_key.invalidate
+            for i in invalidations:
+                f((i,))
+            return res
+
+        return self.db_pool.runInteraction(
+            "store_server_verify_keys",
+            self.db_pool.simple_upsert_many_txn,
+            table="server_signature_keys",
+            key_names=("server_name", "key_id"),
+            key_values=key_values,
+            value_names=(
+                "from_server",
+                "ts_added_ms",
+                "ts_valid_until_ms",
+                "verify_key",
+            ),
+            value_values=value_values,
+        ).addCallback(_invalidate)
+
+    def store_server_keys_json(
+        self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes
+    ):
+        """Stores the JSON bytes for a set of keys from a server
+        The JSON should be signed by the originating server, the intermediate
+        server, and by this server. Updates the value for the
+        (server_name, key_id, from_server) triplet if one already existed.
+        Args:
+            server_name (str): The name of the server.
+            key_id (str): The identifer of the key this JSON is for.
+            from_server (str): The server this JSON was fetched from.
+            ts_now_ms (int): The time now in milliseconds.
+            ts_valid_until_ms (int): The time when this json stops being valid.
+            key_json (bytes): The encoded JSON.
+        """
+        return self.db_pool.simple_upsert(
+            table="server_keys_json",
+            keyvalues={
+                "server_name": server_name,
+                "key_id": key_id,
+                "from_server": from_server,
+            },
+            values={
+                "server_name": server_name,
+                "key_id": key_id,
+                "from_server": from_server,
+                "ts_added_ms": ts_now_ms,
+                "ts_valid_until_ms": ts_expires_ms,
+                "key_json": db_binary_type(key_json_bytes),
+            },
+            desc="store_server_keys_json",
+        )
+
+    def get_server_keys_json(self, server_keys):
+        """Retrive the key json for a list of server_keys and key ids.
+        If no keys are found for a given server, key_id and source then
+        that server, key_id, and source triplet entry will be an empty list.
+        The JSON is returned as a byte array so that it can be efficiently
+        used in an HTTP response.
+        Args:
+            server_keys (list): List of (server_name, key_id, source) triplets.
+        Returns:
+            Deferred[dict[Tuple[str, str, str|None], list[dict]]]:
+                Dict mapping (server_name, key_id, source) triplets to lists of dicts
+        """
+
+        def _get_server_keys_json_txn(txn):
+            results = {}
+            for server_name, key_id, from_server in server_keys:
+                keyvalues = {"server_name": server_name}
+                if key_id is not None:
+                    keyvalues["key_id"] = key_id
+                if from_server is not None:
+                    keyvalues["from_server"] = from_server
+                rows = self.db_pool.simple_select_list_txn(
+                    txn,
+                    "server_keys_json",
+                    keyvalues=keyvalues,
+                    retcols=(
+                        "key_id",
+                        "from_server",
+                        "ts_added_ms",
+                        "ts_valid_until_ms",
+                        "key_json",
+                    ),
+                )
+                results[(server_name, key_id, from_server)] = rows
+            return results
+
+        return self.db_pool.runInteraction(
+            "get_server_keys_json", _get_server_keys_json_txn
+        )
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@ -0,0 +1,398 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+
+
+class MediaRepositoryBackgroundUpdateStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(MediaRepositoryBackgroundUpdateStore, self).__init__(
+            database, db_conn, hs
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            update_name="local_media_repository_url_idx",
+            index_name="local_media_repository_url_idx",
+            table="local_media_repository",
+            columns=["created_ts"],
+            where_clause="url_cache IS NOT NULL",
+        )
+
+
+class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
+    """Persistence for attachments and avatars"""
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(MediaRepositoryStore, self).__init__(database, db_conn, hs)
+
+    def get_local_media(self, media_id):
+        """Get the metadata for a local piece of media
+        Returns:
+            None if the media_id doesn't exist.
+        """
+        return self.db_pool.simple_select_one(
+            "local_media_repository",
+            {"media_id": media_id},
+            (
+                "media_type",
+                "media_length",
+                "upload_name",
+                "created_ts",
+                "quarantined_by",
+                "url_cache",
+            ),
+            allow_none=True,
+            desc="get_local_media",
+        )
+
+    def store_local_media(
+        self,
+        media_id,
+        media_type,
+        time_now_ms,
+        upload_name,
+        media_length,
+        user_id,
+        url_cache=None,
+    ):
+        return self.db_pool.simple_insert(
+            "local_media_repository",
+            {
+                "media_id": media_id,
+                "media_type": media_type,
+                "created_ts": time_now_ms,
+                "upload_name": upload_name,
+                "media_length": media_length,
+                "user_id": user_id.to_string(),
+                "url_cache": url_cache,
+            },
+            desc="store_local_media",
+        )
+
+    def mark_local_media_as_safe(self, media_id: str):
+        """Mark a local media as safe from quarantining."""
+        return self.db_pool.simple_update_one(
+            table="local_media_repository",
+            keyvalues={"media_id": media_id},
+            updatevalues={"safe_from_quarantine": True},
+            desc="mark_local_media_as_safe",
+        )
+
+    def get_url_cache(self, url, ts):
+        """Get the media_id and ts for a cached URL as of the given timestamp
+        Returns:
+            None if the URL isn't cached.
+        """
+
+        def get_url_cache_txn(txn):
+            # get the most recently cached result (relative to the given ts)
+            sql = (
+                "SELECT response_code, etag, expires_ts, og, media_id, download_ts"
+                " FROM local_media_repository_url_cache"
+                " WHERE url = ? AND download_ts <= ?"
+                " ORDER BY download_ts DESC LIMIT 1"
+            )
+            txn.execute(sql, (url, ts))
+            row = txn.fetchone()
+
+            if not row:
+                # ...or if we've requested a timestamp older than the oldest
+                # copy in the cache, return the oldest copy (if any)
+                sql = (
+                    "SELECT response_code, etag, expires_ts, og, media_id, download_ts"
+                    " FROM local_media_repository_url_cache"
+                    " WHERE url = ? AND download_ts > ?"
+                    " ORDER BY download_ts ASC LIMIT 1"
+                )
+                txn.execute(sql, (url, ts))
+                row = txn.fetchone()
+
+            if not row:
+                return None
+
+            return dict(
+                zip(
+                    (
+                        "response_code",
+                        "etag",
+                        "expires_ts",
+                        "og",
+                        "media_id",
+                        "download_ts",
+                    ),
+                    row,
+                )
+            )
+
+        return self.db_pool.runInteraction("get_url_cache", get_url_cache_txn)
+
+    def store_url_cache(
+        self, url, response_code, etag, expires_ts, og, media_id, download_ts
+    ):
+        return self.db_pool.simple_insert(
+            "local_media_repository_url_cache",
+            {
+                "url": url,
+                "response_code": response_code,
+                "etag": etag,
+                "expires_ts": expires_ts,
+                "og": og,
+                "media_id": media_id,
+                "download_ts": download_ts,
+            },
+            desc="store_url_cache",
+        )
+
+    def get_local_media_thumbnails(self, media_id):
+        return self.db_pool.simple_select_list(
+            "local_media_repository_thumbnails",
+            {"media_id": media_id},
+            (
+                "thumbnail_width",
+                "thumbnail_height",
+                "thumbnail_method",
+                "thumbnail_type",
+                "thumbnail_length",
+            ),
+            desc="get_local_media_thumbnails",
+        )
+
+    def store_local_thumbnail(
+        self,
+        media_id,
+        thumbnail_width,
+        thumbnail_height,
+        thumbnail_type,
+        thumbnail_method,
+        thumbnail_length,
+    ):
+        return self.db_pool.simple_insert(
+            "local_media_repository_thumbnails",
+            {
+                "media_id": media_id,
+                "thumbnail_width": thumbnail_width,
+                "thumbnail_height": thumbnail_height,
+                "thumbnail_method": thumbnail_method,
+                "thumbnail_type": thumbnail_type,
+                "thumbnail_length": thumbnail_length,
+            },
+            desc="store_local_thumbnail",
+        )
+
+    def get_cached_remote_media(self, origin, media_id):
+        return self.db_pool.simple_select_one(
+            "remote_media_cache",
+            {"media_origin": origin, "media_id": media_id},
+            (
+                "media_type",
+                "media_length",
+                "upload_name",
+                "created_ts",
+                "filesystem_id",
+                "quarantined_by",
+            ),
+            allow_none=True,
+            desc="get_cached_remote_media",
+        )
+
+    def store_cached_remote_media(
+        self,
+        origin,
+        media_id,
+        media_type,
+        media_length,
+        time_now_ms,
+        upload_name,
+        filesystem_id,
+    ):
+        return self.db_pool.simple_insert(
+            "remote_media_cache",
+            {
+                "media_origin": origin,
+                "media_id": media_id,
+                "media_type": media_type,
+                "media_length": media_length,
+                "created_ts": time_now_ms,
+                "upload_name": upload_name,
+                "filesystem_id": filesystem_id,
+                "last_access_ts": time_now_ms,
+            },
+            desc="store_cached_remote_media",
+        )
+
+    def update_cached_last_access_time(self, local_media, remote_media, time_ms):
+        """Updates the last access time of the given media
+
+        Args:
+            local_media (iterable[str]): Set of media_ids
+            remote_media (iterable[(str, str)]): Set of (server_name, media_id)
+            time_ms: Current time in milliseconds
+        """
+
+        def update_cache_txn(txn):
+            sql = (
+                "UPDATE remote_media_cache SET last_access_ts = ?"
+                " WHERE media_origin = ? AND media_id = ?"
+            )
+
+            txn.executemany(
+                sql,
+                (
+                    (time_ms, media_origin, media_id)
+                    for media_origin, media_id in remote_media
+                ),
+            )
+
+            sql = (
+                "UPDATE local_media_repository SET last_access_ts = ?"
+                " WHERE media_id = ?"
+            )
+
+            txn.executemany(sql, ((time_ms, media_id) for media_id in local_media))
+
+        return self.db_pool.runInteraction(
+            "update_cached_last_access_time", update_cache_txn
+        )
+
+    def get_remote_media_thumbnails(self, origin, media_id):
+        return self.db_pool.simple_select_list(
+            "remote_media_cache_thumbnails",
+            {"media_origin": origin, "media_id": media_id},
+            (
+                "thumbnail_width",
+                "thumbnail_height",
+                "thumbnail_method",
+                "thumbnail_type",
+                "thumbnail_length",
+                "filesystem_id",
+            ),
+            desc="get_remote_media_thumbnails",
+        )
+
+    def store_remote_media_thumbnail(
+        self,
+        origin,
+        media_id,
+        filesystem_id,
+        thumbnail_width,
+        thumbnail_height,
+        thumbnail_type,
+        thumbnail_method,
+        thumbnail_length,
+    ):
+        return self.db_pool.simple_insert(
+            "remote_media_cache_thumbnails",
+            {
+                "media_origin": origin,
+                "media_id": media_id,
+                "thumbnail_width": thumbnail_width,
+                "thumbnail_height": thumbnail_height,
+                "thumbnail_method": thumbnail_method,
+                "thumbnail_type": thumbnail_type,
+                "thumbnail_length": thumbnail_length,
+                "filesystem_id": filesystem_id,
+            },
+            desc="store_remote_media_thumbnail",
+        )
+
+    def get_remote_media_before(self, before_ts):
+        sql = (
+            "SELECT media_origin, media_id, filesystem_id"
+            " FROM remote_media_cache"
+            " WHERE last_access_ts < ?"
+        )
+
+        return self.db_pool.execute(
+            "get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
+        )
+
+    def delete_remote_media(self, media_origin, media_id):
+        def delete_remote_media_txn(txn):
+            self.db_pool.simple_delete_txn(
+                txn,
+                "remote_media_cache",
+                keyvalues={"media_origin": media_origin, "media_id": media_id},
+            )
+            self.db_pool.simple_delete_txn(
+                txn,
+                "remote_media_cache_thumbnails",
+                keyvalues={"media_origin": media_origin, "media_id": media_id},
+            )
+
+        return self.db_pool.runInteraction(
+            "delete_remote_media", delete_remote_media_txn
+        )
+
+    def get_expired_url_cache(self, now_ts):
+        sql = (
+            "SELECT media_id FROM local_media_repository_url_cache"
+            " WHERE expires_ts < ?"
+            " ORDER BY expires_ts ASC"
+            " LIMIT 500"
+        )
+
+        def _get_expired_url_cache_txn(txn):
+            txn.execute(sql, (now_ts,))
+            return [row[0] for row in txn]
+
+        return self.db_pool.runInteraction(
+            "get_expired_url_cache", _get_expired_url_cache_txn
+        )
+
+    async def delete_url_cache(self, media_ids):
+        if len(media_ids) == 0:
+            return
+
+        sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?"
+
+        def _delete_url_cache_txn(txn):
+            txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+        return await self.db_pool.runInteraction(
+            "delete_url_cache", _delete_url_cache_txn
+        )
+
+    def get_url_cache_media_before(self, before_ts):
+        sql = (
+            "SELECT media_id FROM local_media_repository"
+            " WHERE created_ts < ? AND url_cache IS NOT NULL"
+            " ORDER BY created_ts ASC"
+            " LIMIT 500"
+        )
+
+        def _get_url_cache_media_before_txn(txn):
+            txn.execute(sql, (before_ts,))
+            return [row[0] for row in txn]
+
+        return self.db_pool.runInteraction(
+            "get_url_cache_media_before", _get_url_cache_media_before_txn
+        )
+
+    async def delete_url_cache_media(self, media_ids):
+        if len(media_ids) == 0:
+            return
+
+        def _delete_url_cache_media_txn(txn):
+            sql = "DELETE FROM local_media_repository WHERE media_id = ?"
+
+            txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+            sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?"
+
+            txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+        return await self.db_pool.runInteraction(
+            "delete_url_cache_media", _delete_url_cache_media_txn
+        )
--- a/synapse/storage/databases/main/metrics.py
+++ b/synapse/storage/databases/main/metrics.py
@ -0,0 +1,130 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from collections import Counter
+
+from twisted.internet import defer
+
+from synapse.metrics import BucketCollector
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.event_push_actions import (
+    EventPushActionsWorkerStore,
+)
+
+
+class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
+    """Functions to pull various metrics from the DB, for e.g. phone home
+    stats and prometheus metrics.
+    """
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super().__init__(database, db_conn, hs)
+
+        # Collect metrics on the number of forward extremities that exist.
+        # Counter of number of extremities to count
+        self._current_forward_extremities_amount = (
+            Counter()
+        )  # type: typing.Counter[int]
+
+        BucketCollector(
+            "synapse_forward_extremities",
+            lambda: self._current_forward_extremities_amount,
+            buckets=[1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"],
+        )
+
+        # Read the extrems every 60 minutes
+        def read_forward_extremities():
+            # run as a background process to make sure that the database transactions
+            # have a logcontext to report to
+            return run_as_background_process(
+                "read_forward_extremities", self._read_forward_extremities
+            )
+
+        hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
+
+    async def _read_forward_extremities(self):
+        def fetch(txn):
+            txn.execute(
+                """
+                select count(*) c from event_forward_extremities
+                group by room_id
+                """
+            )
+            return txn.fetchall()
+
+        res = await self.db_pool.runInteraction("read_forward_extremities", fetch)
+        self._current_forward_extremities_amount = Counter([x[0] for x in res])
+
+    @defer.inlineCallbacks
+    def count_daily_messages(self):
+        """
+        Returns an estimate of the number of messages sent in the last day.
+
+        If it has been significantly less or more than one day since the last
+        call to this function, it will return None.
+        """
+
+        def _count_messages(txn):
+            sql = """
+                SELECT COALESCE(COUNT(*), 0) FROM events
+                WHERE type = 'm.room.message'
+                AND stream_ordering > ?
+            """
+            txn.execute(sql, (self.stream_ordering_day_ago,))
+            (count,) = txn.fetchone()
+            return count
+
+        ret = yield self.db_pool.runInteraction("count_messages", _count_messages)
+        return ret
+
+    @defer.inlineCallbacks
+    def count_daily_sent_messages(self):
+        def _count_messages(txn):
+            # This is good enough as if you have silly characters in your own
+            # hostname then thats your own fault.
+            like_clause = "%:" + self.hs.hostname
+
+            sql = """
+                SELECT COALESCE(COUNT(*), 0) FROM events
+                WHERE type = 'm.room.message'
+                    AND sender LIKE ?
+                AND stream_ordering > ?
+            """
+
+            txn.execute(sql, (like_clause, self.stream_ordering_day_ago))
+            (count,) = txn.fetchone()
+            return count
+
+        ret = yield self.db_pool.runInteraction(
+            "count_daily_sent_messages", _count_messages
+        )
+        return ret
+
+    @defer.inlineCallbacks
+    def count_daily_active_rooms(self):
+        def _count(txn):
+            sql = """
+                SELECT COALESCE(COUNT(DISTINCT room_id), 0) FROM events
+                WHERE type = 'm.room.message'
+                AND stream_ordering > ?
+            """
+            txn.execute(sql, (self.stream_ordering_day_ago,))
+            (count,) = txn.fetchone()
+            return count
+
+        ret = yield self.db_pool.runInteraction("count_daily_active_rooms", _count)
+        return ret
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@ -0,0 +1,361 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import List
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, make_in_list_sql_clause
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+# Number of msec of granularity to store the monthly_active_user timestamp
+# This means it is not necessary to update the table on every request
+LAST_SEEN_GRANULARITY = 60 * 60 * 1000
+
+
+class MonthlyActiveUsersWorkerStore(SQLBaseStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(MonthlyActiveUsersWorkerStore, self).__init__(database, db_conn, hs)
+        self._clock = hs.get_clock()
+        self.hs = hs
+
+    @cached(num_args=0)
+    def get_monthly_active_count(self):
+        """Generates current count of monthly active users
+
+        Returns:
+            Defered[int]: Number of current monthly active users
+        """
+
+        def _count_users(txn):
+            sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users"
+            txn.execute(sql)
+            (count,) = txn.fetchone()
+            return count
+
+        return self.db_pool.runInteraction("count_users", _count_users)
+
+    @cached(num_args=0)
+    def get_monthly_active_count_by_service(self):
+        """Generates current count of monthly active users broken down by service.
+        A service is typically an appservice but also includes native matrix users.
+        Since the `monthly_active_users` table is populated from the `user_ips` table
+        `config.track_appservice_user_ips` must be set to `true` for this
+        method to return anything other than native matrix users.
+
+        Returns:
+            Deferred[dict]: dict that includes a mapping between app_service_id
+                and the number of occurrences.
+
+        """
+
+        def _count_users_by_service(txn):
+            sql = """
+                SELECT COALESCE(appservice_id, 'native'), COALESCE(count(*), 0)
+                FROM monthly_active_users
+                LEFT JOIN users ON monthly_active_users.user_id=users.name
+                GROUP BY appservice_id;
+            """
+
+            txn.execute(sql)
+            result = txn.fetchall()
+            return dict(result)
+
+        return self.db_pool.runInteraction(
+            "count_users_by_service", _count_users_by_service
+        )
+
+    async def get_registered_reserved_users(self) -> List[str]:
+        """Of the reserved threepids defined in config, retrieve those that are associated
+        with registered users
+
+        Returns:
+            User IDs of actual users that are reserved
+        """
+        users = []
+
+        for tp in self.hs.config.mau_limits_reserved_threepids[
+            : self.hs.config.max_mau_value
+        ]:
+            user_id = await self.hs.get_datastore().get_user_id_by_threepid(
+                tp["medium"], tp["address"]
+            )
+            if user_id:
+                users.append(user_id)
+
+        return users
+
+    @cached(num_args=1)
+    def user_last_seen_monthly_active(self, user_id):
+        """
+            Checks if a given user is part of the monthly active user group
+            Arguments:
+                user_id (str): user to add/update
+            Return:
+                Deferred[int] : timestamp since last seen, None if never seen
+
+        """
+
+        return self.db_pool.simple_select_one_onecol(
+            table="monthly_active_users",
+            keyvalues={"user_id": user_id},
+            retcol="timestamp",
+            allow_none=True,
+            desc="user_last_seen_monthly_active",
+        )
+
+
+class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(MonthlyActiveUsersStore, self).__init__(database, db_conn, hs)
+
+        self._limit_usage_by_mau = hs.config.limit_usage_by_mau
+        self._mau_stats_only = hs.config.mau_stats_only
+        self._max_mau_value = hs.config.max_mau_value
+
+        # Do not add more reserved users than the total allowable number
+        # cur = LoggingTransaction(
+        self.db_pool.new_transaction(
+            db_conn,
+            "initialise_mau_threepids",
+            [],
+            [],
+            self._initialise_reserved_users,
+            hs.config.mau_limits_reserved_threepids[: self._max_mau_value],
+        )
+
+    def _initialise_reserved_users(self, txn, threepids):
+        """Ensures that reserved threepids are accounted for in the MAU table, should
+        be called on start up.
+
+        Args:
+            txn (cursor):
+            threepids (list[dict]): List of threepid dicts to reserve
+        """
+
+        # XXX what is this function trying to achieve?  It upserts into
+        # monthly_active_users for each *registered* reserved mau user, but why?
+        #
+        #  - shouldn't there already be an entry for each reserved user (at least
+        #    if they have been active recently)?
+        #
+        #  - if it's important that the timestamp is kept up to date, why do we only
+        #    run this at startup?
+
+        for tp in threepids:
+            user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"])
+
+            if user_id:
+                is_support = self.is_support_user_txn(txn, user_id)
+                if not is_support:
+                    # We do this manually here to avoid hitting #6791
+                    self.db_pool.simple_upsert_txn(
+                        txn,
+                        table="monthly_active_users",
+                        keyvalues={"user_id": user_id},
+                        values={"timestamp": int(self._clock.time_msec())},
+                    )
+            else:
+                logger.warning("mau limit reserved threepid %s not found in db" % tp)
+
+    async def reap_monthly_active_users(self):
+        """Cleans out monthly active user table to ensure that no stale
+        entries exist.
+        """
+
+        def _reap_users(txn, reserved_users):
+            """
+            Args:
+                reserved_users (tuple): reserved users to preserve
+            """
+
+            thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
+
+            in_clause, in_clause_args = make_in_list_sql_clause(
+                self.database_engine, "user_id", reserved_users
+            )
+
+            txn.execute(
+                "DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
+                % (in_clause,),
+                [thirty_days_ago] + in_clause_args,
+            )
+
+            if self._limit_usage_by_mau:
+                # If MAU user count still exceeds the MAU threshold, then delete on
+                # a least recently active basis.
+                # Note it is not possible to write this query using OFFSET due to
+                # incompatibilities in how sqlite and postgres support the feature.
+                # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
+                # while Postgres does not require 'LIMIT', but also does not support
+                # negative LIMIT values. So there is no way to write it that both can
+                # support
+
+                # Limit must be >= 0 for postgres
+                num_of_non_reserved_users_to_remove = max(
+                    self._max_mau_value - len(reserved_users), 0
+                )
+
+                # It is important to filter reserved users twice to guard
+                # against the case where the reserved user is present in the
+                # SELECT, meaning that a legitimate mau is deleted.
+                sql = """
+                    DELETE FROM monthly_active_users
+                    WHERE user_id NOT IN (
+                        SELECT user_id FROM monthly_active_users
+                        WHERE NOT %s
+                        ORDER BY timestamp DESC
+                        LIMIT ?
+                    )
+                    AND NOT %s
+                """ % (
+                    in_clause,
+                    in_clause,
+                )
+
+                query_args = (
+                    in_clause_args
+                    + [num_of_non_reserved_users_to_remove]
+                    + in_clause_args
+                )
+                txn.execute(sql, query_args)
+
+            # It seems poor to invalidate the whole cache. Postgres supports
+            # 'Returning' which would allow me to invalidate only the
+            # specific users, but sqlite has no way to do this and instead
+            # I would need to SELECT and the DELETE which without locking
+            # is racy.
+            # Have resolved to invalidate the whole cache for now and do
+            # something about it if and when the perf becomes significant
+            self._invalidate_all_cache_and_stream(
+                txn, self.user_last_seen_monthly_active
+            )
+            self._invalidate_cache_and_stream(txn, self.get_monthly_active_count, ())
+
+        reserved_users = await self.get_registered_reserved_users()
+        await self.db_pool.runInteraction(
+            "reap_monthly_active_users", _reap_users, reserved_users
+        )
+
+    @defer.inlineCallbacks
+    def upsert_monthly_active_user(self, user_id):
+        """Updates or inserts the user into the monthly active user table, which
+        is used to track the current MAU usage of the server
+
+        Args:
+            user_id (str): user to add/update
+
+        Returns:
+            Deferred
+        """
+        # Support user never to be included in MAU stats. Note I can't easily call this
+        # from upsert_monthly_active_user_txn because then I need a _txn form of
+        # is_support_user which is complicated because I want to cache the result.
+        # Therefore I call it here and ignore the case where
+        # upsert_monthly_active_user_txn is called directly from
+        # _initialise_reserved_users reasoning that it would be very strange to
+        #  include a support user in this context.
+
+        is_support = yield self.is_support_user(user_id)
+        if is_support:
+            return
+
+        yield self.db_pool.runInteraction(
+            "upsert_monthly_active_user", self.upsert_monthly_active_user_txn, user_id
+        )
+
+    def upsert_monthly_active_user_txn(self, txn, user_id):
+        """Updates or inserts monthly active user member
+
+        We consciously do not call is_support_txn from this method because it
+        is not possible to cache the response. is_support_txn will be false in
+        almost all cases, so it seems reasonable to call it only for
+        upsert_monthly_active_user and to call is_support_txn manually
+        for cases where upsert_monthly_active_user_txn is called directly,
+        like _initialise_reserved_users
+
+        In short, don't call this method with support users. (Support users
+        should not appear in the MAU stats).
+
+        Args:
+            txn (cursor):
+            user_id (str): user to add/update
+
+        Returns:
+            bool: True if a new entry was created, False if an
+            existing one was updated.
+        """
+
+        # Am consciously deciding to lock the table on the basis that is ought
+        # never be a big table and alternative approaches (batching multiple
+        # upserts into a single txn) introduced a lot of extra complexity.
+        # See https://github.com/matrix-org/synapse/issues/3854 for more
+        is_insert = self.db_pool.simple_upsert_txn(
+            txn,
+            table="monthly_active_users",
+            keyvalues={"user_id": user_id},
+            values={"timestamp": int(self._clock.time_msec())},
+        )
+
+        self._invalidate_cache_and_stream(txn, self.get_monthly_active_count, ())
+        self._invalidate_cache_and_stream(
+            txn, self.get_monthly_active_count_by_service, ()
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.user_last_seen_monthly_active, (user_id,)
+        )
+
+        return is_insert
+
+    @defer.inlineCallbacks
+    def populate_monthly_active_users(self, user_id):
+        """Checks on the state of monthly active user limits and optionally
+        add the user to the monthly active tables
+
+        Args:
+            user_id(str): the user_id to query
+        """
+        if self._limit_usage_by_mau or self._mau_stats_only:
+            # Trial users and guests should not be included as part of MAU group
+            is_guest = yield self.is_guest(user_id)
+            if is_guest:
+                return
+            is_trial = yield self.is_trial_user(user_id)
+            if is_trial:
+                return
+
+            last_seen_timestamp = yield self.user_last_seen_monthly_active(user_id)
+            now = self.hs.get_clock().time_msec()
+
+            # We want to reduce to the total number of db writes, and are happy
+            # to trade accuracy of timestamp in order to lighten load. This means
+            # We always insert new users (where MAU threshold has not been reached),
+            # but only update if we have not previously seen the user for
+            # LAST_SEEN_GRANULARITY ms
+            if last_seen_timestamp is None:
+                # In the case where mau_stats_only is True and limit_usage_by_mau is
+                # False, there is no point in checking get_monthly_active_count - it
+                # adds no value and will break the logic if max_mau_value is exceeded.
+                if not self._limit_usage_by_mau:
+                    yield self.upsert_monthly_active_user(user_id)
+                else:
+                    count = yield self.get_monthly_active_count()
+                    if count < self._max_mau_value:
+                        yield self.upsert_monthly_active_user(user_id)
+            elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
+                yield self.upsert_monthly_active_user(user_id)
--- a/synapse/storage/databases/main/openid.py
+++ b/synapse/storage/databases/main/openid.py
@ -0,0 +1,33 @@
+from synapse.storage._base import SQLBaseStore
+
+
+class OpenIdStore(SQLBaseStore):
+    def insert_open_id_token(self, token, ts_valid_until_ms, user_id):
+        return self.db_pool.simple_insert(
+            table="open_id_tokens",
+            values={
+                "token": token,
+                "ts_valid_until_ms": ts_valid_until_ms,
+                "user_id": user_id,
+            },
+            desc="insert_open_id_token",
+        )
+
+    def get_user_id_for_open_id_token(self, token, ts_now_ms):
+        def get_user_id_for_token_txn(txn):
+            sql = (
+                "SELECT user_id FROM open_id_tokens"
+                " WHERE token = ? AND ? <= ts_valid_until_ms"
+            )
+
+            txn.execute(sql, (token, ts_now_ms))
+
+            rows = txn.fetchall()
+            if not rows:
+                return None
+            else:
+                return rows[0][0]
+
+        return self.db_pool.runInteraction(
+            "get_user_id_for_token", get_user_id_for_token_txn
+        )
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Tuple
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.presence import UserPresenceState
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+
+class PresenceStore(SQLBaseStore):
+    @defer.inlineCallbacks
+    def update_presence(self, presence_states):
+        stream_ordering_manager = self._presence_id_gen.get_next_mult(
+            len(presence_states)
+        )
+
+        with stream_ordering_manager as stream_orderings:
+            yield self.db_pool.runInteraction(
+                "update_presence",
+                self._update_presence_txn,
+                stream_orderings,
+                presence_states,
+            )
+
+        return stream_orderings[-1], self._presence_id_gen.get_current_token()
+
+    def _update_presence_txn(self, txn, stream_orderings, presence_states):
+        for stream_id, state in zip(stream_orderings, presence_states):
+            txn.call_after(
+                self.presence_stream_cache.entity_has_changed, state.user_id, stream_id
+            )
+            txn.call_after(self._get_presence_for_user.invalidate, (state.user_id,))
+
+        # Actually insert new rows
+        self.db_pool.simple_insert_many_txn(
+            txn,
+            table="presence_stream",
+            values=[
+                {
+                    "stream_id": stream_id,
+                    "user_id": state.user_id,
+                    "state": state.state,
+                    "last_active_ts": state.last_active_ts,
+                    "last_federation_update_ts": state.last_federation_update_ts,
+                    "last_user_sync_ts": state.last_user_sync_ts,
+                    "status_msg": state.status_msg,
+                    "currently_active": state.currently_active,
+                }
+                for stream_id, state in zip(stream_orderings, presence_states)
+            ],
+        )
+
+        # Delete old rows to stop database from getting really big
+        sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
+
+        for states in batch_iter(presence_states, 50):
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "user_id", [s.user_id for s in states]
+            )
+            txn.execute(sql + clause, [stream_id] + list(args))
+
+    async def get_all_presence_updates(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, list]], int, bool]:
+        """Get updates for presence replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_presence_updates_txn(txn):
+            sql = """
+                SELECT stream_id, user_id, state, last_active_ts,
+                    last_federation_update_ts, last_user_sync_ts,
+                    status_msg,
+                currently_active
+                FROM presence_stream
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+            updates = [(row[0], row[1:]) for row in txn]
+
+            upper_bound = current_id
+            limited = False
+            if len(updates) >= limit:
+                upper_bound = updates[-1][0]
+                limited = True
+
+            return updates, upper_bound, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_presence_updates", get_all_presence_updates_txn
+        )
+
+    @cached()
+    def _get_presence_for_user(self, user_id):
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="_get_presence_for_user",
+        list_name="user_ids",
+        num_args=1,
+        inlineCallbacks=True,
+    )
+    def get_presence_for_users(self, user_ids):
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="presence_stream",
+            column="user_id",
+            iterable=user_ids,
+            keyvalues={},
+            retcols=(
+                "user_id",
+                "state",
+                "last_active_ts",
+                "last_federation_update_ts",
+                "last_user_sync_ts",
+                "status_msg",
+                "currently_active",
+            ),
+            desc="get_presence_for_users",
+        )
+
+        for row in rows:
+            row["currently_active"] = bool(row["currently_active"])
+
+        return {row["user_id"]: UserPresenceState(**row) for row in rows}
+
+    def get_current_presence_token(self):
+        return self._presence_id_gen.get_current_token()
+
+    def allow_presence_visible(self, observed_localpart, observer_userid):
+        return self.db_pool.simple_insert(
+            table="presence_allow_inbound",
+            values={
+                "observed_user_id": observed_localpart,
+                "observer_user_id": observer_userid,
+            },
+            desc="allow_presence_visible",
+            or_ignore=True,
+        )
+
+    def disallow_presence_visible(self, observed_localpart, observer_userid):
+        return self.db_pool.simple_delete_one(
+            table="presence_allow_inbound",
+            keyvalues={
+                "observed_user_id": observed_localpart,
+                "observer_user_id": observer_userid,
+            },
+            desc="disallow_presence_visible",
+        )
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from synapse.api.errors import StoreError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.roommember import ProfileInfo
+
+
+class ProfileWorkerStore(SQLBaseStore):
+    @defer.inlineCallbacks
+    def get_profileinfo(self, user_localpart):
+        try:
+            profile = yield self.db_pool.simple_select_one(
+                table="profiles",
+                keyvalues={"user_id": user_localpart},
+                retcols=("displayname", "avatar_url"),
+                desc="get_profileinfo",
+            )
+        except StoreError as e:
+            if e.code == 404:
+                # no match
+                return ProfileInfo(None, None)
+            else:
+                raise
+
+        return ProfileInfo(
+            avatar_url=profile["avatar_url"], display_name=profile["displayname"]
+        )
+
+    def get_profile_displayname(self, user_localpart):
+        return self.db_pool.simple_select_one_onecol(
+            table="profiles",
+            keyvalues={"user_id": user_localpart},
+            retcol="displayname",
+            desc="get_profile_displayname",
+        )
+
+    def get_profile_avatar_url(self, user_localpart):
+        return self.db_pool.simple_select_one_onecol(
+            table="profiles",
+            keyvalues={"user_id": user_localpart},
+            retcol="avatar_url",
+            desc="get_profile_avatar_url",
+        )
+
+    def get_from_remote_profile_cache(self, user_id):
+        return self.db_pool.simple_select_one(
+            table="remote_profile_cache",
+            keyvalues={"user_id": user_id},
+            retcols=("displayname", "avatar_url"),
+            allow_none=True,
+            desc="get_from_remote_profile_cache",
+        )
+
+    def create_profile(self, user_localpart):
+        return self.db_pool.simple_insert(
+            table="profiles", values={"user_id": user_localpart}, desc="create_profile"
+        )
+
+    def set_profile_displayname(self, user_localpart, new_displayname):
+        return self.db_pool.simple_update_one(
+            table="profiles",
+            keyvalues={"user_id": user_localpart},
+            updatevalues={"displayname": new_displayname},
+            desc="set_profile_displayname",
+        )
+
+    def set_profile_avatar_url(self, user_localpart, new_avatar_url):
+        return self.db_pool.simple_update_one(
+            table="profiles",
+            keyvalues={"user_id": user_localpart},
+            updatevalues={"avatar_url": new_avatar_url},
+            desc="set_profile_avatar_url",
+        )
+
+
+class ProfileStore(ProfileWorkerStore):
+    def add_remote_profile_cache(self, user_id, displayname, avatar_url):
+        """Ensure we are caching the remote user's profiles.
+
+        This should only be called when `is_subscribed_remote_profile_for_user`
+        would return true for the user.
+        """
+        return self.db_pool.simple_upsert(
+            table="remote_profile_cache",
+            keyvalues={"user_id": user_id},
+            values={
+                "displayname": displayname,
+                "avatar_url": avatar_url,
+                "last_check": self._clock.time_msec(),
+            },
+            desc="add_remote_profile_cache",
+        )
+
+    def update_remote_profile_cache(self, user_id, displayname, avatar_url):
+        return self.db_pool.simple_update(
+            table="remote_profile_cache",
+            keyvalues={"user_id": user_id},
+            updatevalues={
+                "displayname": displayname,
+                "avatar_url": avatar_url,
+                "last_check": self._clock.time_msec(),
+            },
+            desc="update_remote_profile_cache",
+        )
+
+    @defer.inlineCallbacks
+    def maybe_delete_remote_profile_cache(self, user_id):
+        """Check if we still care about the remote user's profile, and if we
+        don't then remove their profile from the cache
+        """
+        subscribed = yield self.is_subscribed_remote_profile_for_user(user_id)
+        if not subscribed:
+            yield self.db_pool.simple_delete(
+                table="remote_profile_cache",
+                keyvalues={"user_id": user_id},
+                desc="delete_remote_profile_cache",
+            )
+
+    def get_remote_profile_cache_entries_that_expire(self, last_checked):
+        """Get all users who haven't been checked since `last_checked`
+        """
+
+        def _get_remote_profile_cache_entries_that_expire_txn(txn):
+            sql = """
+                SELECT user_id, displayname, avatar_url
+                FROM remote_profile_cache
+                WHERE last_check < ?
+            """
+
+            txn.execute(sql, (last_checked,))
+
+            return self.db_pool.cursor_to_dict(txn)
+
+        return self.db_pool.runInteraction(
+            "get_remote_profile_cache_entries_that_expire",
+            _get_remote_profile_cache_entries_that_expire_txn,
+        )
+
+    @defer.inlineCallbacks
+    def is_subscribed_remote_profile_for_user(self, user_id):
+        """Check whether we are interested in a remote user's profile.
+        """
+        res = yield self.db_pool.simple_select_one_onecol(
+            table="group_users",
+            keyvalues={"user_id": user_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="should_update_remote_profile_cache_for_user",
+        )
+
+        if res:
+            return True
+
+        res = yield self.db_pool.simple_select_one_onecol(
+            table="group_invites",
+            keyvalues={"user_id": user_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="should_update_remote_profile_cache_for_user",
+        )
+
+        if res:
+            return True
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@ -0,0 +1,400 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.state import StateGroupWorkerStore
+from synapse.types import RoomStreamToken
+
+logger = logging.getLogger(__name__)
+
+
+class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
+    def purge_history(self, room_id, token, delete_local_events):
+        """Deletes room history before a certain point
+
+        Args:
+            room_id (str):
+
+            token (str): A topological token to delete events before
+
+            delete_local_events (bool):
+                if True, we will delete local events as well as remote ones
+                (instead of just marking them as outliers and deleting their
+                state groups).
+
+        Returns:
+            Deferred[set[int]]: The set of state groups that are referenced by
+            deleted events.
+        """
+
+        return self.db_pool.runInteraction(
+            "purge_history",
+            self._purge_history_txn,
+            room_id,
+            token,
+            delete_local_events,
+        )
+
+    def _purge_history_txn(self, txn, room_id, token_str, delete_local_events):
+        token = RoomStreamToken.parse(token_str)
+
+        # Tables that should be pruned:
+        #     event_auth
+        #     event_backward_extremities
+        #     event_edges
+        #     event_forward_extremities
+        #     event_json
+        #     event_push_actions
+        #     event_reference_hashes
+        #     event_relations
+        #     event_search
+        #     event_to_state_groups
+        #     events
+        #     rejections
+        #     room_depth
+        #     state_groups
+        #     state_groups_state
+
+        # we will build a temporary table listing the events so that we don't
+        # have to keep shovelling the list back and forth across the
+        # connection. Annoyingly the python sqlite driver commits the
+        # transaction on CREATE, so let's do this first.
+        #
+        # furthermore, we might already have the table from a previous (failed)
+        # purge attempt, so let's drop the table first.
+
+        txn.execute("DROP TABLE IF EXISTS events_to_purge")
+
+        txn.execute(
+            "CREATE TEMPORARY TABLE events_to_purge ("
+            "    event_id TEXT NOT NULL,"
+            "    should_delete BOOLEAN NOT NULL"
+            ")"
+        )
+
+        # First ensure that we're not about to delete all the forward extremeties
+        txn.execute(
+            "SELECT e.event_id, e.depth FROM events as e "
+            "INNER JOIN event_forward_extremities as f "
+            "ON e.event_id = f.event_id "
+            "AND e.room_id = f.room_id "
+            "WHERE f.room_id = ?",
+            (room_id,),
+        )
+        rows = txn.fetchall()
+        max_depth = max(row[1] for row in rows)
+
+        if max_depth < token.topological:
+            # We need to ensure we don't delete all the events from the database
+            # otherwise we wouldn't be able to send any events (due to not
+            # having any backwards extremeties)
+            raise SynapseError(
+                400, "topological_ordering is greater than forward extremeties"
+            )
+
+        logger.info("[purge] looking for events to delete")
+
+        should_delete_expr = "state_key IS NULL"
+        should_delete_params = ()  # type: Tuple[Any, ...]
+        if not delete_local_events:
+            should_delete_expr += " AND event_id NOT LIKE ?"
+
+            # We include the parameter twice since we use the expression twice
+            should_delete_params += ("%:" + self.hs.hostname, "%:" + self.hs.hostname)
+
+        should_delete_params += (room_id, token.topological)
+
+        # Note that we insert events that are outliers and aren't going to be
+        # deleted, as nothing will happen to them.
+        txn.execute(
+            "INSERT INTO events_to_purge"
+            " SELECT event_id, %s"
+            " FROM events AS e LEFT JOIN state_events USING (event_id)"
+            " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
+            % (should_delete_expr, should_delete_expr),
+            should_delete_params,
+        )
+
+        # We create the indices *after* insertion as that's a lot faster.
+
+        # create an index on should_delete because later we'll be looking for
+        # the should_delete / shouldn't_delete subsets
+        txn.execute(
+            "CREATE INDEX events_to_purge_should_delete"
+            " ON events_to_purge(should_delete)"
+        )
+
+        # We do joins against events_to_purge for e.g. calculating state
+        # groups to purge, etc., so lets make an index.
+        txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)")
+
+        txn.execute("SELECT event_id, should_delete FROM events_to_purge")
+        event_rows = txn.fetchall()
+        logger.info(
+            "[purge] found %i events before cutoff, of which %i can be deleted",
+            len(event_rows),
+            sum(1 for e in event_rows if e[1]),
+        )
+
+        logger.info("[purge] Finding new backward extremities")
+
+        # We calculate the new entries for the backward extremeties by finding
+        # events to be purged that are pointed to by events we're not going to
+        # purge.
+        txn.execute(
+            "SELECT DISTINCT e.event_id FROM events_to_purge AS e"
+            " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id"
+            " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id"
+            " WHERE ep2.event_id IS NULL"
+        )
+        new_backwards_extrems = txn.fetchall()
+
+        logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems)
+
+        txn.execute(
+            "DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id,)
+        )
+
+        # Update backward extremeties
+        txn.executemany(
+            "INSERT INTO event_backward_extremities (room_id, event_id)"
+            " VALUES (?, ?)",
+            [(room_id, event_id) for event_id, in new_backwards_extrems],
+        )
+
+        logger.info("[purge] finding state groups referenced by deleted events")
+
+        # Get all state groups that are referenced by events that are to be
+        # deleted.
+        txn.execute(
+            """
+            SELECT DISTINCT state_group FROM events_to_purge
+            INNER JOIN event_to_state_groups USING (event_id)
+        """
+        )
+
+        referenced_state_groups = {sg for sg, in txn}
+        logger.info(
+            "[purge] found %i referenced state groups", len(referenced_state_groups)
+        )
+
+        logger.info("[purge] removing events from event_to_state_groups")
+        txn.execute(
+            "DELETE FROM event_to_state_groups "
+            "WHERE event_id IN (SELECT event_id from events_to_purge)"
+        )
+        for event_id, _ in event_rows:
+            txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
+
+        # Delete all remote non-state events
+        for table in (
+            "events",
+            "event_json",
+            "event_auth",
+            "event_edges",
+            "event_forward_extremities",
+            "event_reference_hashes",
+            "event_relations",
+            "event_search",
+            "rejections",
+        ):
+            logger.info("[purge] removing events from %s", table)
+
+            txn.execute(
+                "DELETE FROM %s WHERE event_id IN ("
+                "    SELECT event_id FROM events_to_purge WHERE should_delete"
+                ")" % (table,)
+            )
+
+        # event_push_actions lacks an index on event_id, and has one on
+        # (room_id, event_id) instead.
+        for table in ("event_push_actions",):
+            logger.info("[purge] removing events from %s", table)
+
+            txn.execute(
+                "DELETE FROM %s WHERE room_id = ? AND event_id IN ("
+                "    SELECT event_id FROM events_to_purge WHERE should_delete"
+                ")" % (table,),
+                (room_id,),
+            )
+
+        # Mark all state and own events as outliers
+        logger.info("[purge] marking remaining events as outliers")
+        txn.execute(
+            "UPDATE events SET outlier = ?"
+            " WHERE event_id IN ("
+            "    SELECT event_id FROM events_to_purge "
+            "    WHERE NOT should_delete"
+            ")",
+            (True,),
+        )
+
+        # synapse tries to take out an exclusive lock on room_depth whenever it
+        # persists events (because upsert), and once we run this update, we
+        # will block that for the rest of our transaction.
+        #
+        # So, let's stick it at the end so that we don't block event
+        # persistence.
+        #
+        # We do this by calculating the minimum depth of the backwards
+        # extremities. However, the events in event_backward_extremities
+        # are ones we don't have yet so we need to look at the events that
+        # point to it via event_edges table.
+        txn.execute(
+            """
+            SELECT COALESCE(MIN(depth), 0)
+            FROM event_backward_extremities AS eb
+            INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id
+            INNER JOIN events AS e ON e.event_id = eg.event_id
+            WHERE eb.room_id = ?
+        """,
+            (room_id,),
+        )
+        (min_depth,) = txn.fetchone()
+
+        logger.info("[purge] updating room_depth to %d", min_depth)
+
+        txn.execute(
+            "UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
+            (min_depth, room_id),
+        )
+
+        # finally, drop the temp table. this will commit the txn in sqlite,
+        # so make sure to keep this actually last.
+        txn.execute("DROP TABLE events_to_purge")
+
+        logger.info("[purge] done")
+
+        return referenced_state_groups
+
+    def purge_room(self, room_id):
+        """Deletes all record of a room
+
+        Args:
+            room_id (str)
+
+        Returns:
+            Deferred[List[int]]: The list of state groups to delete.
+        """
+
+        return self.db_pool.runInteraction("purge_room", self._purge_room_txn, room_id)
+
+    def _purge_room_txn(self, txn, room_id):
+        # First we fetch all the state groups that should be deleted, before
+        # we delete that information.
+        txn.execute(
+            """
+                SELECT DISTINCT state_group FROM events
+                INNER JOIN event_to_state_groups USING(event_id)
+                WHERE events.room_id = ?
+            """,
+            (room_id,),
+        )
+
+        state_groups = [row[0] for row in txn]
+
+        # Now we delete tables which lack an index on room_id but have one on event_id
+        for table in (
+            "event_auth",
+            "event_edges",
+            "event_push_actions_staging",
+            "event_reference_hashes",
+            "event_relations",
+            "event_to_state_groups",
+            "redactions",
+            "rejections",
+            "state_events",
+        ):
+            logger.info("[purge] removing %s from %s", room_id, table)
+
+            txn.execute(
+                """
+                DELETE FROM %s WHERE event_id IN (
+                  SELECT event_id FROM events WHERE room_id=?
+                )
+                """
+                % (table,),
+                (room_id,),
+            )
+
+        # and finally, the tables with an index on room_id (or no useful index)
+        for table in (
+            "current_state_events",
+            "event_backward_extremities",
+            "event_forward_extremities",
+            "event_json",
+            "event_push_actions",
+            "event_search",
+            "events",
+            "group_rooms",
+            "public_room_list_stream",
+            "receipts_graph",
+            "receipts_linearized",
+            "room_aliases",
+            "room_depth",
+            "room_memberships",
+            "room_stats_state",
+            "room_stats_current",
+            "room_stats_historical",
+            "room_stats_earliest_token",
+            "rooms",
+            "stream_ordering_to_exterm",
+            "users_in_public_rooms",
+            "users_who_share_private_rooms",
+            # no useful index, but let's clear them anyway
+            "appservice_room_list",
+            "e2e_room_keys",
+            "event_push_summary",
+            "pusher_throttle",
+            "group_summary_rooms",
+            "room_account_data",
+            "room_tags",
+            "local_current_membership",
+        ):
+            logger.info("[purge] removing %s from %s", room_id, table)
+            txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))
+
+        # Other tables we do NOT need to clear out:
+        #
+        #  - blocked_rooms
+        #    This is important, to make sure that we don't accidentally rejoin a blocked
+        #    room after it was purged
+        #
+        #  - user_directory
+        #    This has a room_id column, but it is unused
+        #
+
+        # Other tables that we might want to consider clearing out include:
+        #
+        #  - event_reports
+        #       Given that these are intended for abuse management my initial
+        #       inclination is to leave them in place.
+        #
+        #  - current_state_delta_stream
+        #  - ex_outlier_stream
+        #  - room_tags_revisions
+        #       The problem with these is that they are largeish and there is no room_id
+        #       index on them. In any case we should be clearing out 'stream' tables
+        #       periodically anyway (#5888)
+
+        # TODO: we could probably usefully do a bunch of cache invalidation here
+
+        logger.info("[purge] done")
+
+        return state_groups
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@ -0,0 +1,759 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Tuple, Union
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.push.baserules import list_with_base_rules
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.pusher import PusherWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
+from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
+from synapse.storage.util.id_generators import ChainedIdGenerator
+from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+def _load_rules(rawrules, enabled_map):
+    ruleslist = []
+    for rawrule in rawrules:
+        rule = dict(rawrule)
+        rule["conditions"] = db_to_json(rawrule["conditions"])
+        rule["actions"] = db_to_json(rawrule["actions"])
+        rule["default"] = False
+        ruleslist.append(rule)
+
+    # We're going to be mutating this a lot, so do a deep copy
+    rules = list(list_with_base_rules(ruleslist))
+
+    for i, rule in enumerate(rules):
+        rule_id = rule["rule_id"]
+        if rule_id in enabled_map:
+            if rule.get("enabled", True) != bool(enabled_map[rule_id]):
+                # Rules are cached across users.
+                rule = dict(rule)
+                rule["enabled"] = bool(enabled_map[rule_id])
+                rules[i] = rule
+
+    return rules
+
+
+class PushRulesWorkerStore(
+    ApplicationServiceWorkerStore,
+    ReceiptsWorkerStore,
+    PusherWorkerStore,
+    RoomMemberWorkerStore,
+    EventsWorkerStore,
+    SQLBaseStore,
+):
+    """This is an abstract base class where subclasses must implement
+    `get_max_push_rules_stream_id` which can be called in the initializer.
+    """
+
+    # This ABCMeta metaclass ensures that we cannot be instantiated without
+    # the abstract methods being implemented.
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(PushRulesWorkerStore, self).__init__(database, db_conn, hs)
+
+        if hs.config.worker.worker_app is None:
+            self._push_rules_stream_id_gen = ChainedIdGenerator(
+                self._stream_id_gen, db_conn, "push_rules_stream", "stream_id"
+            )  # type: Union[ChainedIdGenerator, SlavedIdTracker]
+        else:
+            self._push_rules_stream_id_gen = SlavedIdTracker(
+                db_conn, "push_rules_stream", "stream_id"
+            )
+
+        push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "push_rules_stream",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=self.get_max_push_rules_stream_id(),
+        )
+
+        self.push_rules_stream_cache = StreamChangeCache(
+            "PushRulesStreamChangeCache",
+            push_rules_id,
+            prefilled_cache=push_rules_prefill,
+        )
+
+    @abc.abstractmethod
+    def get_max_push_rules_stream_id(self):
+        """Get the position of the push rules stream.
+
+        Returns:
+            int
+        """
+        raise NotImplementedError()
+
+    @cachedInlineCallbacks(max_entries=5000)
+    def get_push_rules_for_user(self, user_id):
+        rows = yield self.db_pool.simple_select_list(
+            table="push_rules",
+            keyvalues={"user_name": user_id},
+            retcols=(
+                "user_name",
+                "rule_id",
+                "priority_class",
+                "priority",
+                "conditions",
+                "actions",
+            ),
+            desc="get_push_rules_enabled_for_user",
+        )
+
+        rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+        enabled_map = yield self.get_push_rules_enabled_for_user(user_id)
+
+        rules = _load_rules(rows, enabled_map)
+
+        return rules
+
+    @cachedInlineCallbacks(max_entries=5000)
+    def get_push_rules_enabled_for_user(self, user_id):
+        results = yield self.db_pool.simple_select_list(
+            table="push_rules_enable",
+            keyvalues={"user_name": user_id},
+            retcols=("user_name", "rule_id", "enabled"),
+            desc="get_push_rules_enabled_for_user",
+        )
+        return {r["rule_id"]: False if r["enabled"] == 0 else True for r in results}
+
+    def have_push_rules_changed_for_user(self, user_id, last_id):
+        if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id):
+            return defer.succeed(False)
+        else:
+
+            def have_push_rules_changed_txn(txn):
+                sql = (
+                    "SELECT COUNT(stream_id) FROM push_rules_stream"
+                    " WHERE user_id = ? AND ? < stream_id"
+                )
+                txn.execute(sql, (user_id, last_id))
+                (count,) = txn.fetchone()
+                return bool(count)
+
+            return self.db_pool.runInteraction(
+                "have_push_rules_changed", have_push_rules_changed_txn
+            )
+
+    @cachedList(
+        cached_method_name="get_push_rules_for_user",
+        list_name="user_ids",
+        num_args=1,
+        inlineCallbacks=True,
+    )
+    def bulk_get_push_rules(self, user_ids):
+        if not user_ids:
+            return {}
+
+        results = {user_id: [] for user_id in user_ids}
+
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="push_rules",
+            column="user_name",
+            iterable=user_ids,
+            retcols=("*",),
+            desc="bulk_get_push_rules",
+        )
+
+        rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+        for row in rows:
+            results.setdefault(row["user_name"], []).append(row)
+
+        enabled_map_by_user = yield self.bulk_get_push_rules_enabled(user_ids)
+
+        for user_id, rules in results.items():
+            results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
+
+        return results
+
+    @defer.inlineCallbacks
+    def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
+        """Copy a single push rule from one room to another for a specific user.
+
+        Args:
+            new_room_id (str): ID of the new room.
+            user_id (str): ID of user the push rule belongs to.
+            rule (Dict): A push rule.
+        """
+        # Create new rule id
+        rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1])
+        new_rule_id = rule_id_scope + "/" + new_room_id
+
+        # Change room id in each condition
+        for condition in rule.get("conditions", []):
+            if condition.get("key") == "room_id":
+                condition["pattern"] = new_room_id
+
+        # Add the rule for the new room
+        yield self.add_push_rule(
+            user_id=user_id,
+            rule_id=new_rule_id,
+            priority_class=rule["priority_class"],
+            conditions=rule["conditions"],
+            actions=rule["actions"],
+        )
+
+    @defer.inlineCallbacks
+    def copy_push_rules_from_room_to_room_for_user(
+        self, old_room_id, new_room_id, user_id
+    ):
+        """Copy all of the push rules from one room to another for a specific
+        user.
+
+        Args:
+            old_room_id (str): ID of the old room.
+            new_room_id (str): ID of the new room.
+            user_id (str): ID of user to copy push rules for.
+        """
+        # Retrieve push rules for this user
+        user_push_rules = yield self.get_push_rules_for_user(user_id)
+
+        # Get rules relating to the old room and copy them to the new room
+        for rule in user_push_rules:
+            conditions = rule.get("conditions", [])
+            if any(
+                (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
+                for c in conditions
+            ):
+                yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule)
+
+    @defer.inlineCallbacks
+    def bulk_get_push_rules_for_room(self, event, context):
+        state_group = context.state_group
+        if not state_group:
+            # If state_group is None it means it has yet to be assigned a
+            # state group, i.e. we need to make sure that calls with a state_group
+            # of None don't hit previous cached calls with a None state_group.
+            # To do this we set the state_group to a new object as object() != object()
+            state_group = object()
+
+        current_state_ids = yield defer.ensureDeferred(context.get_current_state_ids())
+        result = yield self._bulk_get_push_rules_for_room(
+            event.room_id, state_group, current_state_ids, event=event
+        )
+        return result
+
+    @cachedInlineCallbacks(num_args=2, cache_context=True)
+    def _bulk_get_push_rules_for_room(
+        self, room_id, state_group, current_state_ids, cache_context, event=None
+    ):
+        # We don't use `state_group`, its there so that we can cache based
+        # on it. However, its important that its never None, since two current_state's
+        # with a state_group of None are likely to be different.
+        # See bulk_get_push_rules_for_room for how we work around this.
+        assert state_group is not None
+
+        # We also will want to generate notifs for other people in the room so
+        # their unread countss are correct in the event stream, but to avoid
+        # generating them for bot / AS users etc, we only do so for people who've
+        # sent a read receipt into the room.
+
+        users_in_room = yield self._get_joined_users_from_context(
+            room_id,
+            state_group,
+            current_state_ids,
+            on_invalidate=cache_context.invalidate,
+            event=event,
+        )
+
+        # We ignore app service users for now. This is so that we don't fill
+        # up the `get_if_users_have_pushers` cache with AS entries that we
+        # know don't have pushers, nor even read receipts.
+        local_users_in_room = {
+            u
+            for u in users_in_room
+            if self.hs.is_mine_id(u)
+            and not self.get_if_app_services_interested_in_user(u)
+        }
+
+        # users in the room who have pushers need to get push rules run because
+        # that's how their pushers work
+        if_users_with_pushers = yield self.get_if_users_have_pushers(
+            local_users_in_room, on_invalidate=cache_context.invalidate
+        )
+        user_ids = {
+            uid for uid, have_pusher in if_users_with_pushers.items() if have_pusher
+        }
+
+        users_with_receipts = yield self.get_users_with_read_receipts_in_room(
+            room_id, on_invalidate=cache_context.invalidate
+        )
+
+        # any users with pushers must be ours: they have pushers
+        for uid in users_with_receipts:
+            if uid in local_users_in_room:
+                user_ids.add(uid)
+
+        rules_by_user = yield self.bulk_get_push_rules(
+            user_ids, on_invalidate=cache_context.invalidate
+        )
+
+        rules_by_user = {k: v for k, v in rules_by_user.items() if v is not None}
+
+        return rules_by_user
+
+    @cachedList(
+        cached_method_name="get_push_rules_enabled_for_user",
+        list_name="user_ids",
+        num_args=1,
+        inlineCallbacks=True,
+    )
+    def bulk_get_push_rules_enabled(self, user_ids):
+        if not user_ids:
+            return {}
+
+        results = {user_id: {} for user_id in user_ids}
+
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="push_rules_enable",
+            column="user_name",
+            iterable=user_ids,
+            retcols=("user_name", "rule_id", "enabled"),
+            desc="bulk_get_push_rules_enabled",
+        )
+        for row in rows:
+            enabled = bool(row["enabled"])
+            results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled
+        return results
+
+    async def get_all_push_rule_updates(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        """Get updates for push_rules replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_push_rule_updates_txn(txn):
+            sql = """
+                SELECT stream_id, user_id
+                FROM push_rules_stream
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+            updates = [(stream_id, (user_id,)) for stream_id, user_id in txn]
+
+            limited = False
+            upper_bound = current_id
+            if len(updates) == limit:
+                limited = True
+                upper_bound = updates[-1][0]
+
+            return updates, upper_bound, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_push_rule_updates", get_all_push_rule_updates_txn
+        )
+
+
+class PushRuleStore(PushRulesWorkerStore):
+    @defer.inlineCallbacks
+    def add_push_rule(
+        self,
+        user_id,
+        rule_id,
+        priority_class,
+        conditions,
+        actions,
+        before=None,
+        after=None,
+    ):
+        conditions_json = json.dumps(conditions)
+        actions_json = json.dumps(actions)
+        with self._push_rules_stream_id_gen.get_next() as ids:
+            stream_id, event_stream_ordering = ids
+            if before or after:
+                yield self.db_pool.runInteraction(
+                    "_add_push_rule_relative_txn",
+                    self._add_push_rule_relative_txn,
+                    stream_id,
+                    event_stream_ordering,
+                    user_id,
+                    rule_id,
+                    priority_class,
+                    conditions_json,
+                    actions_json,
+                    before,
+                    after,
+                )
+            else:
+                yield self.db_pool.runInteraction(
+                    "_add_push_rule_highest_priority_txn",
+                    self._add_push_rule_highest_priority_txn,
+                    stream_id,
+                    event_stream_ordering,
+                    user_id,
+                    rule_id,
+                    priority_class,
+                    conditions_json,
+                    actions_json,
+                )
+
+    def _add_push_rule_relative_txn(
+        self,
+        txn,
+        stream_id,
+        event_stream_ordering,
+        user_id,
+        rule_id,
+        priority_class,
+        conditions_json,
+        actions_json,
+        before,
+        after,
+    ):
+        # Lock the table since otherwise we'll have annoying races between the
+        # SELECT here and the UPSERT below.
+        self.database_engine.lock_table(txn, "push_rules")
+
+        relative_to_rule = before or after
+
+        res = self.db_pool.simple_select_one_txn(
+            txn,
+            table="push_rules",
+            keyvalues={"user_name": user_id, "rule_id": relative_to_rule},
+            retcols=["priority_class", "priority"],
+            allow_none=True,
+        )
+
+        if not res:
+            raise RuleNotFoundException(
+                "before/after rule not found: %s" % (relative_to_rule,)
+            )
+
+        base_priority_class = res["priority_class"]
+        base_rule_priority = res["priority"]
+
+        if base_priority_class != priority_class:
+            raise InconsistentRuleException(
+                "Given priority class does not match class of relative rule"
+            )
+
+        if before:
+            # Higher priority rules are executed first, So adding a rule before
+            # a rule means giving it a higher priority than that rule.
+            new_rule_priority = base_rule_priority + 1
+        else:
+            # We increment the priority of the existing rules to make space for
+            # the new rule. Therefore if we want this rule to appear after
+            # an existing rule we give it the priority of the existing rule,
+            # and then increment the priority of the existing rule.
+            new_rule_priority = base_rule_priority
+
+        sql = (
+            "UPDATE push_rules SET priority = priority + 1"
+            " WHERE user_name = ? AND priority_class = ? AND priority >= ?"
+        )
+
+        txn.execute(sql, (user_id, priority_class, new_rule_priority))
+
+        self._upsert_push_rule_txn(
+            txn,
+            stream_id,
+            event_stream_ordering,
+            user_id,
+            rule_id,
+            priority_class,
+            new_rule_priority,
+            conditions_json,
+            actions_json,
+        )
+
+    def _add_push_rule_highest_priority_txn(
+        self,
+        txn,
+        stream_id,
+        event_stream_ordering,
+        user_id,
+        rule_id,
+        priority_class,
+        conditions_json,
+        actions_json,
+    ):
+        # Lock the table since otherwise we'll have annoying races between the
+        # SELECT here and the UPSERT below.
+        self.database_engine.lock_table(txn, "push_rules")
+
+        # find the highest priority rule in that class
+        sql = (
+            "SELECT COUNT(*), MAX(priority) FROM push_rules"
+            " WHERE user_name = ? and priority_class = ?"
+        )
+        txn.execute(sql, (user_id, priority_class))
+        res = txn.fetchall()
+        (how_many, highest_prio) = res[0]
+
+        new_prio = 0
+        if how_many > 0:
+            new_prio = highest_prio + 1
+
+        self._upsert_push_rule_txn(
+            txn,
+            stream_id,
+            event_stream_ordering,
+            user_id,
+            rule_id,
+            priority_class,
+            new_prio,
+            conditions_json,
+            actions_json,
+        )
+
+    def _upsert_push_rule_txn(
+        self,
+        txn,
+        stream_id,
+        event_stream_ordering,
+        user_id,
+        rule_id,
+        priority_class,
+        priority,
+        conditions_json,
+        actions_json,
+        update_stream=True,
+    ):
+        """Specialised version of simple_upsert_txn that picks a push_rule_id
+        using the _push_rule_id_gen if it needs to insert the rule. It assumes
+        that the "push_rules" table is locked"""
+
+        sql = (
+            "UPDATE push_rules"
+            " SET priority_class = ?, priority = ?, conditions = ?, actions = ?"
+            " WHERE user_name = ? AND rule_id = ?"
+        )
+
+        txn.execute(
+            sql,
+            (priority_class, priority, conditions_json, actions_json, user_id, rule_id),
+        )
+
+        if txn.rowcount == 0:
+            # We didn't update a row with the given rule_id so insert one
+            push_rule_id = self._push_rule_id_gen.get_next()
+
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="push_rules",
+                values={
+                    "id": push_rule_id,
+                    "user_name": user_id,
+                    "rule_id": rule_id,
+                    "priority_class": priority_class,
+                    "priority": priority,
+                    "conditions": conditions_json,
+                    "actions": actions_json,
+                },
+            )
+
+        if update_stream:
+            self._insert_push_rules_update_txn(
+                txn,
+                stream_id,
+                event_stream_ordering,
+                user_id,
+                rule_id,
+                op="ADD",
+                data={
+                    "priority_class": priority_class,
+                    "priority": priority,
+                    "conditions": conditions_json,
+                    "actions": actions_json,
+                },
+            )
+
+    @defer.inlineCallbacks
+    def delete_push_rule(self, user_id, rule_id):
+        """
+        Delete a push rule. Args specify the row to be deleted and can be
+        any of the columns in the push_rule table, but below are the
+        standard ones
+
+        Args:
+            user_id (str): The matrix ID of the push rule owner
+            rule_id (str): The rule_id of the rule to be deleted
+        """
+
+        def delete_push_rule_txn(txn, stream_id, event_stream_ordering):
+            self.db_pool.simple_delete_one_txn(
+                txn, "push_rules", {"user_name": user_id, "rule_id": rule_id}
+            )
+
+            self._insert_push_rules_update_txn(
+                txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE"
+            )
+
+        with self._push_rules_stream_id_gen.get_next() as ids:
+            stream_id, event_stream_ordering = ids
+            yield self.db_pool.runInteraction(
+                "delete_push_rule",
+                delete_push_rule_txn,
+                stream_id,
+                event_stream_ordering,
+            )
+
+    @defer.inlineCallbacks
+    def set_push_rule_enabled(self, user_id, rule_id, enabled):
+        with self._push_rules_stream_id_gen.get_next() as ids:
+            stream_id, event_stream_ordering = ids
+            yield self.db_pool.runInteraction(
+                "_set_push_rule_enabled_txn",
+                self._set_push_rule_enabled_txn,
+                stream_id,
+                event_stream_ordering,
+                user_id,
+                rule_id,
+                enabled,
+            )
+
+    def _set_push_rule_enabled_txn(
+        self, txn, stream_id, event_stream_ordering, user_id, rule_id, enabled
+    ):
+        new_id = self._push_rules_enable_id_gen.get_next()
+        self.db_pool.simple_upsert_txn(
+            txn,
+            "push_rules_enable",
+            {"user_name": user_id, "rule_id": rule_id},
+            {"enabled": 1 if enabled else 0},
+            {"id": new_id},
+        )
+
+        self._insert_push_rules_update_txn(
+            txn,
+            stream_id,
+            event_stream_ordering,
+            user_id,
+            rule_id,
+            op="ENABLE" if enabled else "DISABLE",
+        )
+
+    @defer.inlineCallbacks
+    def set_push_rule_actions(self, user_id, rule_id, actions, is_default_rule):
+        actions_json = json.dumps(actions)
+
+        def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering):
+            if is_default_rule:
+                # Add a dummy rule to the rules table with the user specified
+                # actions.
+                priority_class = -1
+                priority = 1
+                self._upsert_push_rule_txn(
+                    txn,
+                    stream_id,
+                    event_stream_ordering,
+                    user_id,
+                    rule_id,
+                    priority_class,
+                    priority,
+                    "[]",
+                    actions_json,
+                    update_stream=False,
+                )
+            else:
+                self.db_pool.simple_update_one_txn(
+                    txn,
+                    "push_rules",
+                    {"user_name": user_id, "rule_id": rule_id},
+                    {"actions": actions_json},
+                )
+
+            self._insert_push_rules_update_txn(
+                txn,
+                stream_id,
+                event_stream_ordering,
+                user_id,
+                rule_id,
+                op="ACTIONS",
+                data={"actions": actions_json},
+            )
+
+        with self._push_rules_stream_id_gen.get_next() as ids:
+            stream_id, event_stream_ordering = ids
+            yield self.db_pool.runInteraction(
+                "set_push_rule_actions",
+                set_push_rule_actions_txn,
+                stream_id,
+                event_stream_ordering,
+            )
+
+    def _insert_push_rules_update_txn(
+        self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None
+    ):
+        values = {
+            "stream_id": stream_id,
+            "event_stream_ordering": event_stream_ordering,
+            "user_id": user_id,
+            "rule_id": rule_id,
+            "op": op,
+        }
+        if data is not None:
+            values.update(data)
+
+        self.db_pool.simple_insert_txn(txn, "push_rules_stream", values=values)
+
+        txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,))
+        txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,))
+        txn.call_after(
+            self.push_rules_stream_cache.entity_has_changed, user_id, stream_id
+        )
+
+    def get_push_rules_stream_token(self):
+        """Get the position of the push rules stream.
+        Returns a pair of a stream id for the push_rules stream and the
+        room stream ordering it corresponds to."""
+        return self._push_rules_stream_id_gen.get_current_token()
+
+    def get_max_push_rules_stream_id(self):
+        return self.get_push_rules_stream_token()[0]
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@ -0,0 +1,356 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Iterable, Iterator, List, Tuple
+
+from canonicaljson import encode_canonical_json
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
+
+logger = logging.getLogger(__name__)
+
+
+class PusherWorkerStore(SQLBaseStore):
+    def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[dict]:
+        """JSON-decode the data in the rows returned from the `pushers` table
+
+        Drops any rows whose data cannot be decoded
+        """
+        for r in rows:
+            dataJson = r["data"]
+            try:
+                r["data"] = db_to_json(dataJson)
+            except Exception as e:
+                logger.warning(
+                    "Invalid JSON in data for pusher %d: %s, %s",
+                    r["id"],
+                    dataJson,
+                    e.args[0],
+                )
+                continue
+
+            yield r
+
+    @defer.inlineCallbacks
+    def user_has_pusher(self, user_id):
+        ret = yield self.db_pool.simple_select_one_onecol(
+            "pushers", {"user_name": user_id}, "id", allow_none=True
+        )
+        return ret is not None
+
+    def get_pushers_by_app_id_and_pushkey(self, app_id, pushkey):
+        return self.get_pushers_by({"app_id": app_id, "pushkey": pushkey})
+
+    def get_pushers_by_user_id(self, user_id):
+        return self.get_pushers_by({"user_name": user_id})
+
+    @defer.inlineCallbacks
+    def get_pushers_by(self, keyvalues):
+        ret = yield self.db_pool.simple_select_list(
+            "pushers",
+            keyvalues,
+            [
+                "id",
+                "user_name",
+                "access_token",
+                "profile_tag",
+                "kind",
+                "app_id",
+                "app_display_name",
+                "device_display_name",
+                "pushkey",
+                "ts",
+                "lang",
+                "data",
+                "last_stream_ordering",
+                "last_success",
+                "failing_since",
+            ],
+            desc="get_pushers_by",
+        )
+        return self._decode_pushers_rows(ret)
+
+    @defer.inlineCallbacks
+    def get_all_pushers(self):
+        def get_pushers(txn):
+            txn.execute("SELECT * FROM pushers")
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            return self._decode_pushers_rows(rows)
+
+        rows = yield self.db_pool.runInteraction("get_all_pushers", get_pushers)
+        return rows
+
+    async def get_all_updated_pushers_rows(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        """Get updates for pushers replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_updated_pushers_rows_txn(txn):
+            sql = """
+                SELECT id, user_name, app_id, pushkey
+                FROM pushers
+                WHERE ? < id AND id <= ?
+                ORDER BY id ASC LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+            updates = [
+                (stream_id, (user_name, app_id, pushkey, False))
+                for stream_id, user_name, app_id, pushkey in txn
+            ]
+
+            sql = """
+                SELECT stream_id, user_id, app_id, pushkey
+                FROM deleted_pushers
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+            updates.extend(
+                (stream_id, (user_name, app_id, pushkey, True))
+                for stream_id, user_name, app_id, pushkey in txn
+            )
+
+            updates.sort()  # Sort so that they're ordered by stream id
+
+            limited = False
+            upper_bound = current_id
+            if len(updates) >= limit:
+                limited = True
+                upper_bound = updates[-1][0]
+
+            return updates, upper_bound, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn
+        )
+
+    @cachedInlineCallbacks(num_args=1, max_entries=15000)
+    def get_if_user_has_pusher(self, user_id):
+        # This only exists for the cachedList decorator
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="get_if_user_has_pusher",
+        list_name="user_ids",
+        num_args=1,
+        inlineCallbacks=True,
+    )
+    def get_if_users_have_pushers(self, user_ids):
+        rows = yield self.db_pool.simple_select_many_batch(
+            table="pushers",
+            column="user_name",
+            iterable=user_ids,
+            retcols=["user_name"],
+            desc="get_if_users_have_pushers",
+        )
+
+        result = {user_id: False for user_id in user_ids}
+        result.update({r["user_name"]: True for r in rows})
+
+        return result
+
+    @defer.inlineCallbacks
+    def update_pusher_last_stream_ordering(
+        self, app_id, pushkey, user_id, last_stream_ordering
+    ):
+        yield self.db_pool.simple_update_one(
+            "pushers",
+            {"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+            {"last_stream_ordering": last_stream_ordering},
+            desc="update_pusher_last_stream_ordering",
+        )
+
+    @defer.inlineCallbacks
+    def update_pusher_last_stream_ordering_and_success(
+        self, app_id, pushkey, user_id, last_stream_ordering, last_success
+    ):
+        """Update the last stream ordering position we've processed up to for
+        the given pusher.
+
+        Args:
+            app_id (str)
+            pushkey (str)
+            last_stream_ordering (int)
+            last_success (int)
+
+        Returns:
+            Deferred[bool]: True if the pusher still exists; False if it has been deleted.
+        """
+        updated = yield self.db_pool.simple_update(
+            table="pushers",
+            keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+            updatevalues={
+                "last_stream_ordering": last_stream_ordering,
+                "last_success": last_success,
+            },
+            desc="update_pusher_last_stream_ordering_and_success",
+        )
+
+        return bool(updated)
+
+    @defer.inlineCallbacks
+    def update_pusher_failing_since(self, app_id, pushkey, user_id, failing_since):
+        yield self.db_pool.simple_update(
+            table="pushers",
+            keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+            updatevalues={"failing_since": failing_since},
+            desc="update_pusher_failing_since",
+        )
+
+    @defer.inlineCallbacks
+    def get_throttle_params_by_room(self, pusher_id):
+        res = yield self.db_pool.simple_select_list(
+            "pusher_throttle",
+            {"pusher": pusher_id},
+            ["room_id", "last_sent_ts", "throttle_ms"],
+            desc="get_throttle_params_by_room",
+        )
+
+        params_by_room = {}
+        for row in res:
+            params_by_room[row["room_id"]] = {
+                "last_sent_ts": row["last_sent_ts"],
+                "throttle_ms": row["throttle_ms"],
+            }
+
+        return params_by_room
+
+    @defer.inlineCallbacks
+    def set_throttle_params(self, pusher_id, room_id, params):
+        # no need to lock because `pusher_throttle` has a primary key on
+        # (pusher, room_id) so simple_upsert will retry
+        yield self.db_pool.simple_upsert(
+            "pusher_throttle",
+            {"pusher": pusher_id, "room_id": room_id},
+            params,
+            desc="set_throttle_params",
+            lock=False,
+        )
+
+
+class PusherStore(PusherWorkerStore):
+    def get_pushers_stream_token(self):
+        return self._pushers_id_gen.get_current_token()
+
+    @defer.inlineCallbacks
+    def add_pusher(
+        self,
+        user_id,
+        access_token,
+        kind,
+        app_id,
+        app_display_name,
+        device_display_name,
+        pushkey,
+        pushkey_ts,
+        lang,
+        data,
+        last_stream_ordering,
+        profile_tag="",
+    ):
+        with self._pushers_id_gen.get_next() as stream_id:
+            # no need to lock because `pushers` has a unique key on
+            # (app_id, pushkey, user_name) so simple_upsert will retry
+            yield self.db_pool.simple_upsert(
+                table="pushers",
+                keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+                values={
+                    "access_token": access_token,
+                    "kind": kind,
+                    "app_display_name": app_display_name,
+                    "device_display_name": device_display_name,
+                    "ts": pushkey_ts,
+                    "lang": lang,
+                    "data": bytearray(encode_canonical_json(data)),
+                    "last_stream_ordering": last_stream_ordering,
+                    "profile_tag": profile_tag,
+                    "id": stream_id,
+                },
+                desc="add_pusher",
+                lock=False,
+            )
+
+            user_has_pusher = self.get_if_user_has_pusher.cache.get(
+                (user_id,), None, update_metrics=False
+            )
+
+            if user_has_pusher is not True:
+                # invalidate, since we the user might not have had a pusher before
+                yield self.db_pool.runInteraction(
+                    "add_pusher",
+                    self._invalidate_cache_and_stream,
+                    self.get_if_user_has_pusher,
+                    (user_id,),
+                )
+
+    @defer.inlineCallbacks
+    def delete_pusher_by_app_id_pushkey_user_id(self, app_id, pushkey, user_id):
+        def delete_pusher_txn(txn, stream_id):
+            self._invalidate_cache_and_stream(
+                txn, self.get_if_user_has_pusher, (user_id,)
+            )
+
+            self.db_pool.simple_delete_one_txn(
+                txn,
+                "pushers",
+                {"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+            )
+
+            # it's possible for us to end up with duplicate rows for
+            # (app_id, pushkey, user_id) at different stream_ids, but that
+            # doesn't really matter.
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="deleted_pushers",
+                values={
+                    "stream_id": stream_id,
+                    "app_id": app_id,
+                    "pushkey": pushkey,
+                    "user_id": user_id,
+                },
+            )
+
+        with self._pushers_id_gen.get_next() as stream_id:
+            yield self.db_pool.runInteraction(
+                "delete_pusher", delete_pusher_txn, stream_id
+            )
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@ -0,0 +1,591 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Tuple
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.util.async_helpers import ObservableDeferred
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+class ReceiptsWorkerStore(SQLBaseStore):
+    """This is an abstract base class where subclasses must implement
+    `get_max_receipt_stream_id` which can be called in the initializer.
+    """
+
+    # This ABCMeta metaclass ensures that we cannot be instantiated without
+    # the abstract methods being implemented.
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        super(ReceiptsWorkerStore, self).__init__(database, db_conn, hs)
+
+        self._receipts_stream_cache = StreamChangeCache(
+            "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id()
+        )
+
+    @abc.abstractmethod
+    def get_max_receipt_stream_id(self):
+        """Get the current max stream ID for receipts stream
+
+        Returns:
+            int
+        """
+        raise NotImplementedError()
+
+    @cachedInlineCallbacks()
+    def get_users_with_read_receipts_in_room(self, room_id):
+        receipts = yield self.get_receipts_for_room(room_id, "m.read")
+        return {r["user_id"] for r in receipts}
+
+    @cached(num_args=2)
+    def get_receipts_for_room(self, room_id, receipt_type):
+        return self.db_pool.simple_select_list(
+            table="receipts_linearized",
+            keyvalues={"room_id": room_id, "receipt_type": receipt_type},
+            retcols=("user_id", "event_id"),
+            desc="get_receipts_for_room",
+        )
+
+    @cached(num_args=3)
+    def get_last_receipt_event_id_for_user(self, user_id, room_id, receipt_type):
+        return self.db_pool.simple_select_one_onecol(
+            table="receipts_linearized",
+            keyvalues={
+                "room_id": room_id,
+                "receipt_type": receipt_type,
+                "user_id": user_id,
+            },
+            retcol="event_id",
+            desc="get_own_receipt_for_user",
+            allow_none=True,
+        )
+
+    @cachedInlineCallbacks(num_args=2)
+    def get_receipts_for_user(self, user_id, receipt_type):
+        rows = yield self.db_pool.simple_select_list(
+            table="receipts_linearized",
+            keyvalues={"user_id": user_id, "receipt_type": receipt_type},
+            retcols=("room_id", "event_id"),
+            desc="get_receipts_for_user",
+        )
+
+        return {row["room_id"]: row["event_id"] for row in rows}
+
+    @defer.inlineCallbacks
+    def get_receipts_for_user_with_orderings(self, user_id, receipt_type):
+        def f(txn):
+            sql = (
+                "SELECT rl.room_id, rl.event_id,"
+                " e.topological_ordering, e.stream_ordering"
+                " FROM receipts_linearized AS rl"
+                " INNER JOIN events AS e USING (room_id, event_id)"
+                " WHERE rl.room_id = e.room_id"
+                " AND rl.event_id = e.event_id"
+                " AND user_id = ?"
+            )
+            txn.execute(sql, (user_id,))
+            return txn.fetchall()
+
+        rows = yield self.db_pool.runInteraction(
+            "get_receipts_for_user_with_orderings", f
+        )
+        return {
+            row[0]: {
+                "event_id": row[1],
+                "topological_ordering": row[2],
+                "stream_ordering": row[3],
+            }
+            for row in rows
+        }
+
+    @defer.inlineCallbacks
+    def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None):
+        """Get receipts for multiple rooms for sending to clients.
+
+        Args:
+            room_ids (list): List of room_ids.
+            to_key (int): Max stream id to fetch receipts upto.
+            from_key (int): Min stream id to fetch receipts from. None fetches
+                from the start.
+
+        Returns:
+            list: A list of receipts.
+        """
+        room_ids = set(room_ids)
+
+        if from_key is not None:
+            # Only ask the database about rooms where there have been new
+            # receipts added since `from_key`
+            room_ids = yield self._receipts_stream_cache.get_entities_changed(
+                room_ids, from_key
+            )
+
+        results = yield self._get_linearized_receipts_for_rooms(
+            room_ids, to_key, from_key=from_key
+        )
+
+        return [ev for res in results.values() for ev in res]
+
+    def get_linearized_receipts_for_room(self, room_id, to_key, from_key=None):
+        """Get receipts for a single room for sending to clients.
+
+        Args:
+            room_ids (str): The room id.
+            to_key (int): Max stream id to fetch receipts upto.
+            from_key (int): Min stream id to fetch receipts from. None fetches
+                from the start.
+
+        Returns:
+            Deferred[list]: A list of receipts.
+        """
+        if from_key is not None:
+            # Check the cache first to see if any new receipts have been added
+            # since`from_key`. If not we can no-op.
+            if not self._receipts_stream_cache.has_entity_changed(room_id, from_key):
+                defer.succeed([])
+
+        return self._get_linearized_receipts_for_room(room_id, to_key, from_key)
+
+    @cachedInlineCallbacks(num_args=3, tree=True)
+    def _get_linearized_receipts_for_room(self, room_id, to_key, from_key=None):
+        """See get_linearized_receipts_for_room
+        """
+
+        def f(txn):
+            if from_key:
+                sql = (
+                    "SELECT * FROM receipts_linearized WHERE"
+                    " room_id = ? AND stream_id > ? AND stream_id <= ?"
+                )
+
+                txn.execute(sql, (room_id, from_key, to_key))
+            else:
+                sql = (
+                    "SELECT * FROM receipts_linearized WHERE"
+                    " room_id = ? AND stream_id <= ?"
+                )
+
+                txn.execute(sql, (room_id, to_key))
+
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            return rows
+
+        rows = yield self.db_pool.runInteraction("get_linearized_receipts_for_room", f)
+
+        if not rows:
+            return []
+
+        content = {}
+        for row in rows:
+            content.setdefault(row["event_id"], {}).setdefault(row["receipt_type"], {})[
+                row["user_id"]
+            ] = db_to_json(row["data"])
+
+        return [{"type": "m.receipt", "room_id": room_id, "content": content}]
+
+    @cachedList(
+        cached_method_name="_get_linearized_receipts_for_room",
+        list_name="room_ids",
+        num_args=3,
+        inlineCallbacks=True,
+    )
+    def _get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None):
+        if not room_ids:
+            return {}
+
+        def f(txn):
+            if from_key:
+                sql = """
+                    SELECT * FROM receipts_linearized WHERE
+                    stream_id > ? AND stream_id <= ? AND
+                """
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "room_id", room_ids
+                )
+
+                txn.execute(sql + clause, [from_key, to_key] + list(args))
+            else:
+                sql = """
+                    SELECT * FROM receipts_linearized WHERE
+                    stream_id <= ? AND
+                """
+
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "room_id", room_ids
+                )
+
+                txn.execute(sql + clause, [to_key] + list(args))
+
+            return self.db_pool.cursor_to_dict(txn)
+
+        txn_results = yield self.db_pool.runInteraction(
+            "_get_linearized_receipts_for_rooms", f
+        )
+
+        results = {}
+        for row in txn_results:
+            # We want a single event per room, since we want to batch the
+            # receipts by room, event and type.
+            room_event = results.setdefault(
+                row["room_id"],
+                {"type": "m.receipt", "room_id": row["room_id"], "content": {}},
+            )
+
+            # The content is of the form:
+            # {"$foo:bar": { "read": { "@user:host": <receipt> }, .. }, .. }
+            event_entry = room_event["content"].setdefault(row["event_id"], {})
+            receipt_type = event_entry.setdefault(row["receipt_type"], {})
+
+            receipt_type[row["user_id"]] = db_to_json(row["data"])
+
+        results = {
+            room_id: [results[room_id]] if room_id in results else []
+            for room_id in room_ids
+        }
+        return results
+
+    def get_users_sent_receipts_between(self, last_id: int, current_id: int):
+        """Get all users who sent receipts between `last_id` exclusive and
+        `current_id` inclusive.
+
+        Returns:
+            Deferred[List[str]]
+        """
+
+        if last_id == current_id:
+            return defer.succeed([])
+
+        def _get_users_sent_receipts_between_txn(txn):
+            sql = """
+                SELECT DISTINCT user_id FROM receipts_linearized
+                WHERE ? < stream_id AND stream_id <= ?
+            """
+            txn.execute(sql, (last_id, current_id))
+
+            return [r[0] for r in txn]
+
+        return self.db_pool.runInteraction(
+            "get_users_sent_receipts_between", _get_users_sent_receipts_between_txn
+        )
+
+    async def get_all_updated_receipts(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, list]], int, bool]:
+        """Get updates for receipts replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_all_updated_receipts_txn(txn):
+            sql = """
+                SELECT stream_id, room_id, receipt_type, user_id, event_id, data
+                FROM receipts_linearized
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, limit))
+
+            updates = [(r[0], r[1:5] + (db_to_json(r[5]),)) for r in txn]
+
+            limited = False
+            upper_bound = current_id
+
+            if len(updates) == limit:
+                limited = True
+                upper_bound = updates[-1][0]
+
+            return updates, upper_bound, limited
+
+        return await self.db_pool.runInteraction(
+            "get_all_updated_receipts", get_all_updated_receipts_txn
+        )
+
+    def _invalidate_get_users_with_receipts_in_room(
+        self, room_id, receipt_type, user_id
+    ):
+        if receipt_type != "m.read":
+            return
+
+        # Returns either an ObservableDeferred or the raw result
+        res = self.get_users_with_read_receipts_in_room.cache.get(
+            room_id, None, update_metrics=False
+        )
+
+        # first handle the ObservableDeferred case
+        if isinstance(res, ObservableDeferred):
+            if res.has_called():
+                res = res.get_result()
+            else:
+                res = None
+
+        if res and user_id in res:
+            # We'd only be adding to the set, so no point invalidating if the
+            # user is already there
+            return
+
+        self.get_users_with_read_receipts_in_room.invalidate((room_id,))
+
+
+class ReceiptsStore(ReceiptsWorkerStore):
+    def __init__(self, database: DatabasePool, db_conn, hs):
+        # We instantiate this first as the ReceiptsWorkerStore constructor
+        # needs to be able to call get_max_receipt_stream_id
+        self._receipts_id_gen = StreamIdGenerator(
+            db_conn, "receipts_linearized", "stream_id"
+        )
+
+        super(ReceiptsStore, self).__init__(database, db_conn, hs)
+
+    def get_max_receipt_stream_id(self):
+        return self._receipts_id_gen.get_current_token()
+
+    def insert_linearized_receipt_txn(
+        self, txn, room_id, receipt_type, user_id, event_id, data, stream_id
+    ):
+        """Inserts a read-receipt into the database if it's newer than the current RR
+
+        Returns: int|None
+            None if the RR is older than the current RR
+            otherwise, the rx timestamp of the event that the RR corresponds to
+                (or 0 if the event is unknown)
+        """
+        res = self.db_pool.simple_select_one_txn(
+            txn,
+            table="events",
+            retcols=["stream_ordering", "received_ts"],
+            keyvalues={"event_id": event_id},
+            allow_none=True,
+        )
+
+        stream_ordering = int(res["stream_ordering"]) if res else None
+        rx_ts = res["received_ts"] if res else 0
+
+        # We don't want to clobber receipts for more recent events, so we
+        # have to compare orderings of existing receipts
+        if stream_ordering is not None:
+            sql = (
+                "SELECT stream_ordering, event_id FROM events"
+                " INNER JOIN receipts_linearized as r USING (event_id, room_id)"
+                " WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ?"
+            )
+            txn.execute(sql, (room_id, receipt_type, user_id))
+
+            for so, eid in txn:
+                if int(so) >= stream_ordering:
+                    logger.debug(
+                        "Ignoring new receipt for %s in favour of existing "
+                        "one for later event %s",
+                        event_id,
+                        eid,
+                    )
+                    return None
+
+        txn.call_after(self.get_receipts_for_room.invalidate, (room_id, receipt_type))
+        txn.call_after(
+            self._invalidate_get_users_with_receipts_in_room,
+            room_id,
+            receipt_type,
+            user_id,
+        )
+        txn.call_after(self.get_receipts_for_user.invalidate, (user_id, receipt_type))
+        # FIXME: This shouldn't invalidate the whole cache
+        txn.call_after(
+            self._get_linearized_receipts_for_room.invalidate_many, (room_id,)
+        )
+
+        txn.call_after(
+            self._receipts_stream_cache.entity_has_changed, room_id, stream_id
+        )
+
+        txn.call_after(
+            self.get_last_receipt_event_id_for_user.invalidate,
+            (user_id, room_id, receipt_type),
+        )
+
+        self.db_pool.simple_upsert_txn(
+            txn,
+            table="receipts_linearized",
+            keyvalues={
+                "room_id": room_id,
+                "receipt_type": receipt_type,
+                "user_id": user_id,
+            },
+            values={
+                "stream_id": stream_id,
+                "event_id": event_id,
+                "data": json.dumps(data),
+            },
+            # receipts_linearized has a unique constraint on
+            # (user_id, room_id, receipt_type), so no need to lock
+            lock=False,
+        )
+
+        if receipt_type == "m.read" and stream_ordering is not None:
+            self._remove_old_push_actions_before_txn(
+                txn, room_id=room_id, user_id=user_id, stream_ordering=stream_ordering
+            )
+
+        return rx_ts
+
+    @defer.inlineCallbacks
+    def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data):
+        """Insert a receipt, either from local client or remote server.
+
+        Automatically does conversion between linearized and graph
+        representations.
+        """
+        if not event_ids:
+            return
+
+        if len(event_ids) == 1:
+            linearized_event_id = event_ids[0]
+        else:
+            # we need to points in graph -> linearized form.
+            # TODO: Make this better.
+            def graph_to_linear(txn):
+                clause, args = make_in_list_sql_clause(
+                    self.database_engine, "event_id", event_ids
+                )
+
+                sql = """
+                    SELECT event_id WHERE room_id = ? AND stream_ordering IN (
+                        SELECT max(stream_ordering) WHERE %s
+                    )
+                """ % (
+                    clause,
+                )
+
+                txn.execute(sql, [room_id] + list(args))
+                rows = txn.fetchall()
+                if rows:
+                    return rows[0][0]
+                else:
+                    raise RuntimeError("Unrecognized event_ids: %r" % (event_ids,))
+
+            linearized_event_id = yield self.db_pool.runInteraction(
+                "insert_receipt_conv", graph_to_linear
+            )
+
+        stream_id_manager = self._receipts_id_gen.get_next()
+        with stream_id_manager as stream_id:
+            event_ts = yield self.db_pool.runInteraction(
+                "insert_linearized_receipt",
+                self.insert_linearized_receipt_txn,
+                room_id,
+                receipt_type,
+                user_id,
+                linearized_event_id,
+                data,
+                stream_id=stream_id,
+            )
+
+        if event_ts is None:
+            return None
+
+        now = self._clock.time_msec()
+        logger.debug(
+            "RR for event %s in %s (%i ms old)",
+            linearized_event_id,
+            room_id,
+            now - event_ts,
+        )
+
+        yield self.insert_graph_receipt(room_id, receipt_type, user_id, event_ids, data)
+
+        max_persisted_id = self._receipts_id_gen.get_current_token()
+
+        return stream_id, max_persisted_id
+
+    def insert_graph_receipt(self, room_id, receipt_type, user_id, event_ids, data):
+        return self.db_pool.runInteraction(
+            "insert_graph_receipt",
+            self.insert_graph_receipt_txn,
+            room_id,
+            receipt_type,
+            user_id,
+            event_ids,
+            data,
+        )
+
+    def insert_graph_receipt_txn(
+        self, txn, room_id, receipt_type, user_id, event_ids, data
+    ):
+        txn.call_after(self.get_receipts_for_room.invalidate, (room_id, receipt_type))
+        txn.call_after(
+            self._invalidate_get_users_with_receipts_in_room,
+            room_id,
+            receipt_type,
+            user_id,
+        )
+        txn.call_after(self.get_receipts_for_user.invalidate, (user_id, receipt_type))
+        # FIXME: This shouldn't invalidate the whole cache
+        txn.call_after(
+            self._get_linearized_receipts_for_room.invalidate_many, (room_id,)
+        )
+
+        self.db_pool.simple_delete_txn(
+            txn,
+            table="receipts_graph",
+            keyvalues={
+                "room_id": room_id,
+                "receipt_type": receipt_type,
+                "user_id": user_id,
+            },
+        )
+        self.db_pool.simple_insert_txn(
+            txn,
+            table="receipts_graph",
+            values={
+                "room_id": room_id,
+                "receipt_type": receipt_type,
+                "user_id": user_id,
+                "event_ids": json.dumps(event_ids),
+                "data": json.dumps(data),
+            },
+        )
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
--- a/synapse/storage/databases/main/rejections.py
+++ b/synapse/storage/databases/main/rejections.py
@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage._base import SQLBaseStore
+
+logger = logging.getLogger(__name__)
+
+
+class RejectionsStore(SQLBaseStore):
+    def get_rejection_reason(self, event_id):
+        return self.db_pool.simple_select_one_onecol(
+            table="rejections",
+            retcol="reason",
+            keyvalues={"event_id": event_id},
+            allow_none=True,
+            desc="get_rejection_reason",
+        )
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@ -0,0 +1,327 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import attr
+
+from synapse.api.constants import RelationTypes
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.stream import generate_pagination_where_clause
+from synapse.storage.relations import (
+    AggregationPaginationToken,
+    PaginationChunk,
+    RelationPaginationToken,
+)
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+
+logger = logging.getLogger(__name__)
+
+
+class RelationsWorkerStore(SQLBaseStore):
+    @cached(tree=True)
+    def get_relations_for_event(
+        self,
+        event_id,
+        relation_type=None,
+        event_type=None,
+        aggregation_key=None,
+        limit=5,
+        direction="b",
+        from_token=None,
+        to_token=None,
+    ):
+        """Get a list of relations for an event, ordered by topological ordering.
+
+        Args:
+            event_id (str): Fetch events that relate to this event ID.
+            relation_type (str|None): Only fetch events with this relation
+                type, if given.
+            event_type (str|None): Only fetch events with this event type, if
+                given.
+            aggregation_key (str|None): Only fetch events with this aggregation
+                key, if given.
+            limit (int): Only fetch the most recent `limit` events.
+            direction (str): Whether to fetch the most recent first (`"b"`) or
+                the oldest first (`"f"`).
+            from_token (RelationPaginationToken|None): Fetch rows from the given
+                token, or from the start if None.
+            to_token (RelationPaginationToken|None): Fetch rows up to the given
+                token, or up to the end if None.
+
+        Returns:
+            Deferred[PaginationChunk]: List of event IDs that match relations
+            requested. The rows are of the form `{"event_id": "..."}`.
+        """
+
+        where_clause = ["relates_to_id = ?"]
+        where_args = [event_id]
+
+        if relation_type is not None:
+            where_clause.append("relation_type = ?")
+            where_args.append(relation_type)
+
+        if event_type is not None:
+            where_clause.append("type = ?")
+            where_args.append(event_type)
+
+        if aggregation_key:
+            where_clause.append("aggregation_key = ?")
+            where_args.append(aggregation_key)
+
+        pagination_clause = generate_pagination_where_clause(
+            direction=direction,
+            column_names=("topological_ordering", "stream_ordering"),
+            from_token=attr.astuple(from_token) if from_token else None,
+            to_token=attr.astuple(to_token) if to_token else None,
+            engine=self.database_engine,
+        )
+
+        if pagination_clause:
+            where_clause.append(pagination_clause)
+
+        if direction == "b":
+            order = "DESC"
+        else:
+            order = "ASC"
+
+        sql = """
+            SELECT event_id, topological_ordering, stream_ordering
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE %s
+            ORDER BY topological_ordering %s, stream_ordering %s
+            LIMIT ?
+        """ % (
+            " AND ".join(where_clause),
+            order,
+            order,
+        )
+
+        def _get_recent_references_for_event_txn(txn):
+            txn.execute(sql, where_args + [limit + 1])
+
+            last_topo_id = None
+            last_stream_id = None
+            events = []
+            for row in txn:
+                events.append({"event_id": row[0]})
+                last_topo_id = row[1]
+                last_stream_id = row[2]
+
+            next_batch = None
+            if len(events) > limit and last_topo_id and last_stream_id:
+                next_batch = RelationPaginationToken(last_topo_id, last_stream_id)
+
+            return PaginationChunk(
+                chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+            )
+
+        return self.db_pool.runInteraction(
+            "get_recent_references_for_event", _get_recent_references_for_event_txn
+        )
+
+    @cached(tree=True)
+    def get_aggregation_groups_for_event(
+        self,
+        event_id,
+        event_type=None,
+        limit=5,
+        direction="b",
+        from_token=None,
+        to_token=None,
+    ):
+        """Get a list of annotations on the event, grouped by event type and
+        aggregation key, sorted by count.
+
+        This is used e.g. to get the what and how many reactions have happend
+        on an event.
+
+        Args:
+            event_id (str): Fetch events that relate to this event ID.
+            event_type (str|None): Only fetch events with this event type, if
+                given.
+            limit (int): Only fetch the `limit` groups.
+            direction (str): Whether to fetch the highest count first (`"b"`) or
+                the lowest count first (`"f"`).
+            from_token (AggregationPaginationToken|None): Fetch rows from the
+                given token, or from the start if None.
+            to_token (AggregationPaginationToken|None): Fetch rows up to the
+                given token, or up to the end if None.
+
+
+        Returns:
+            Deferred[PaginationChunk]: List of groups of annotations that
+            match. Each row is a dict with `type`, `key` and `count` fields.
+        """
+
+        where_clause = ["relates_to_id = ?", "relation_type = ?"]
+        where_args = [event_id, RelationTypes.ANNOTATION]
+
+        if event_type:
+            where_clause.append("type = ?")
+            where_args.append(event_type)
+
+        having_clause = generate_pagination_where_clause(
+            direction=direction,
+            column_names=("COUNT(*)", "MAX(stream_ordering)"),
+            from_token=attr.astuple(from_token) if from_token else None,
+            to_token=attr.astuple(to_token) if to_token else None,
+            engine=self.database_engine,
+        )
+
+        if direction == "b":
+            order = "DESC"
+        else:
+            order = "ASC"
+
+        if having_clause:
+            having_clause = "HAVING " + having_clause
+        else:
+            having_clause = ""
+
+        sql = """
+            SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering)
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE {where_clause}
+            GROUP BY relation_type, type, aggregation_key
+            {having_clause}
+            ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order}
+            LIMIT ?
+        """.format(
+            where_clause=" AND ".join(where_clause),
+            order=order,
+            having_clause=having_clause,
+        )
+
+        def _get_aggregation_groups_for_event_txn(txn):
+            txn.execute(sql, where_args + [limit + 1])
+
+            next_batch = None
+            events = []
+            for row in txn:
+                events.append({"type": row[0], "key": row[1], "count": row[2]})
+                next_batch = AggregationPaginationToken(row[2], row[3])
+
+            if len(events) <= limit:
+                next_batch = None
+
+            return PaginationChunk(
+                chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+            )
+
+        return self.db_pool.runInteraction(
+            "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
+        )
+
+    @cachedInlineCallbacks()
+    def get_applicable_edit(self, event_id):
+        """Get the most recent edit (if any) that has happened for the given
+        event.
+
+        Correctly handles checking whether edits were allowed to happen.
+
+        Args:
+            event_id (str): The original event ID
+
+        Returns:
+            Deferred[EventBase|None]: Returns the most recent edit, if any.
+        """
+
+        # We only allow edits for `m.room.message` events that have the same sender
+        # and event type. We can't assert these things during regular event auth so
+        # we have to do the checks post hoc.
+
+        # Fetches latest edit that has the same type and sender as the
+        # original, and is an `m.room.message`.
+        sql = """
+            SELECT edit.event_id FROM events AS edit
+            INNER JOIN event_relations USING (event_id)
+            INNER JOIN events AS original ON
+                original.event_id = relates_to_id
+                AND edit.type = original.type
+                AND edit.sender = original.sender
+            WHERE
+                relates_to_id = ?
+                AND relation_type = ?
+                AND edit.type = 'm.room.message'
+            ORDER by edit.origin_server_ts DESC, edit.event_id DESC
+            LIMIT 1
+        """
+
+        def _get_applicable_edit_txn(txn):
+            txn.execute(sql, (event_id, RelationTypes.REPLACE))
+            row = txn.fetchone()
+            if row:
+                return row[0]
+
+        edit_id = yield self.db_pool.runInteraction(
+            "get_applicable_edit", _get_applicable_edit_txn
+        )
+
+        if not edit_id:
+            return
+
+        edit_event = yield self.get_event(edit_id, allow_none=True)
+        return edit_event
+
+    def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender):
+        """Check if a user has already annotated an event with the same key
+        (e.g. already liked an event).
+
+        Args:
+            parent_id (str): The event being annotated
+            event_type (str): The event type of the annotation
+            aggregation_key (str): The aggregation key of the annotation
+            sender (str): The sender of the annotation
+
+        Returns:
+            Deferred[bool]
+        """
+
+        sql = """
+            SELECT 1 FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE
+                relates_to_id = ?
+                AND relation_type = ?
+                AND type = ?
+                AND sender = ?
+                AND aggregation_key = ?
+            LIMIT 1;
+        """
+
+        def _get_if_user_has_annotated_event(txn):
+            txn.execute(
+                sql,
+                (
+                    parent_id,
+                    RelationTypes.ANNOTATION,
+                    event_type,
+                    sender,
+                    aggregation_key,
+                ),
+            )
+
+            return bool(txn.fetchone())
+
+        return self.db_pool.runInteraction(
+            "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
+        )
+
+
+class RelationsStore(RelationsWorkerStore):
+    pass
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
--- a/synapse/storage/databases/main/schema/delta/12/v12.sql
+++ b/synapse/storage/databases/main/schema/delta/12/v12.sql
@ -0,0 +1,63 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS rejections(
+    event_id TEXT NOT NULL,
+    reason TEXT NOT NULL,
+    last_check TEXT NOT NULL,
+    UNIQUE (event_id)
+);
+
+-- Push notification endpoints that users have configured
+CREATE TABLE IF NOT EXISTS pushers (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  user_name TEXT NOT NULL,
+  profile_tag VARCHAR(32) NOT NULL,
+  kind VARCHAR(8) NOT NULL,
+  app_id VARCHAR(64) NOT NULL,
+  app_display_name VARCHAR(64) NOT NULL,
+  device_display_name VARCHAR(128) NOT NULL,
+  pushkey VARBINARY(512) NOT NULL,
+  ts BIGINT UNSIGNED NOT NULL,
+  lang VARCHAR(8),
+  data LONGBLOB,
+  last_token TEXT,
+  last_success BIGINT UNSIGNED,
+  failing_since BIGINT UNSIGNED,
+  UNIQUE (app_id, pushkey)
+);
+
+CREATE TABLE IF NOT EXISTS push_rules (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  user_name TEXT NOT NULL,
+  rule_id TEXT NOT NULL,
+  priority_class TINYINT NOT NULL,
+  priority INTEGER NOT NULL DEFAULT 0,
+  conditions TEXT NOT NULL,
+  actions TEXT NOT NULL,
+  UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX IF NOT EXISTS push_rules_user_name on push_rules (user_name);
+
+CREATE TABLE IF NOT EXISTS user_filters(
+  user_id TEXT,
+  filter_id BIGINT UNSIGNED,
+  filter_json LONGBLOB
+);
+
+CREATE INDEX IF NOT EXISTS user_filters_by_user_id_filter_id ON user_filters(
+    user_id, filter_id
+);
--- a/synapse/storage/databases/main/schema/delta/13/v13.sql
+++ b/synapse/storage/databases/main/schema/delta/13/v13.sql
@ -0,0 +1,19 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* We used to create a tables called application_services and
+ * application_services_regex, but these are no longer used and are removed in
+ * delta 54.
+ */
--- a/synapse/storage/databases/main/schema/delta/14/v14.sql
+++ b/synapse/storage/databases/main/schema/delta/14/v14.sql
@ -0,0 +1,23 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS push_rules_enable (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  user_name TEXT NOT NULL,
+  rule_id TEXT NOT NULL,
+  enabled TINYINT,
+  UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX IF NOT EXISTS push_rules_enable_user_name on push_rules_enable (user_name);
--- a/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
+++ b/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
@ -0,0 +1,31 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS application_services_state(
+    as_id TEXT PRIMARY KEY,
+    state VARCHAR(5),
+    last_txn INTEGER
+);
+
+CREATE TABLE IF NOT EXISTS application_services_txns(
+    as_id TEXT NOT NULL,
+    txn_id INTEGER NOT NULL,
+    event_ids TEXT NOT NULL,
+    UNIQUE(as_id, txn_id)
+);
+
+CREATE INDEX IF NOT EXISTS application_services_txns_id ON application_services_txns (
+    as_id
+);
--- a/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
+++ b/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
@ -0,0 +1,2 @@
+
+CREATE INDEX IF NOT EXISTS presence_list_user_id ON presence_list (user_id);
--- a/synapse/storage/databases/main/schema/delta/15/v15.sql
+++ b/synapse/storage/databases/main/schema/delta/15/v15.sql
@ -0,0 +1,24 @@
+-- Drop, copy & recreate pushers table to change unique key
+-- Also add access_token column at the same time
+CREATE TABLE IF NOT EXISTS pushers2 (
+  id BIGINT PRIMARY KEY,
+  user_name TEXT NOT NULL,
+  access_token BIGINT DEFAULT NULL,
+  profile_tag VARCHAR(32) NOT NULL,
+  kind VARCHAR(8) NOT NULL,
+  app_id VARCHAR(64) NOT NULL,
+  app_display_name VARCHAR(64) NOT NULL,
+  device_display_name VARCHAR(128) NOT NULL,
+  pushkey bytea NOT NULL,
+  ts BIGINT NOT NULL,
+  lang VARCHAR(8),
+  data bytea,
+  last_token TEXT,
+  last_success BIGINT,
+  failing_since BIGINT,
+  UNIQUE (app_id, pushkey)
+);
+INSERT INTO pushers2 (id, user_name, profile_tag, kind, app_id, app_display_name, device_display_name, pushkey, ts, lang, data, last_token, last_success, failing_since)
+  SELECT id, user_name, profile_tag, kind, app_id, app_display_name, device_display_name, pushkey, ts, lang, data, last_token, last_success, failing_since FROM pushers;
+DROP TABLE pushers;
+ALTER TABLE pushers2 RENAME TO pushers;
--- a/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
@ -0,0 +1,4 @@
+CREATE INDEX events_order ON events (topological_ordering, stream_ordering);
+CREATE INDEX events_order_room ON events (
+    room_id, topological_ordering, stream_ordering
+);
--- a/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
@ -0,0 +1,2 @@
+CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id
+    ON remote_media_cache_thumbnails (media_id);
--- a/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
+++ b/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
@ -0,0 +1,9 @@
+
+
+DELETE FROM event_to_state_groups WHERE state_group not in (
+    SELECT MAX(state_group) FROM event_to_state_groups GROUP BY event_id
+);
+
+DELETE FROM event_to_state_groups WHERE rowid not in (
+    SELECT MIN(rowid) FROM event_to_state_groups GROUP BY event_id
+);
--- a/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
@ -0,0 +1,3 @@
+
+CREATE INDEX IF NOT EXISTS room_aliases_id ON room_aliases(room_id);
+CREATE INDEX IF NOT EXISTS room_alias_servers_alias ON room_alias_servers(room_alias);
--- a/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
+++ b/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
@ -0,0 +1,72 @@
+
+-- We can use SQLite features here, since other db support was only added in v16
+
+--
+DELETE FROM current_state_events WHERE rowid not in (
+    SELECT MIN(rowid) FROM current_state_events GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS current_state_events_event_id;
+CREATE UNIQUE INDEX current_state_events_event_id ON current_state_events(event_id);
+
+--
+DELETE FROM room_memberships WHERE rowid not in (
+    SELECT MIN(rowid) FROM room_memberships GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS room_memberships_event_id;
+CREATE UNIQUE INDEX room_memberships_event_id ON room_memberships(event_id);
+
+--
+DELETE FROM topics WHERE rowid not in (
+    SELECT MIN(rowid) FROM topics GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS topics_event_id;
+CREATE UNIQUE INDEX topics_event_id ON topics(event_id);
+
+--
+DELETE FROM room_names WHERE rowid not in (
+    SELECT MIN(rowid) FROM room_names GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS room_names_id;
+CREATE UNIQUE INDEX room_names_id ON room_names(event_id);
+
+--
+DELETE FROM presence WHERE rowid not in (
+    SELECT MIN(rowid) FROM presence GROUP BY user_id
+);
+
+DROP INDEX IF EXISTS presence_id;
+CREATE UNIQUE INDEX presence_id ON presence(user_id);
+
+--
+DELETE FROM presence_allow_inbound WHERE rowid not in (
+    SELECT MIN(rowid) FROM presence_allow_inbound
+    GROUP BY observed_user_id, observer_user_id
+);
+
+DROP INDEX IF EXISTS presence_allow_inbound_observers;
+CREATE UNIQUE INDEX presence_allow_inbound_observers ON presence_allow_inbound(
+    observed_user_id, observer_user_id
+);
+
+--
+DELETE FROM presence_list WHERE rowid not in (
+    SELECT MIN(rowid) FROM presence_list
+    GROUP BY user_id, observed_user_id
+);
+
+DROP INDEX IF EXISTS presence_list_observers;
+CREATE UNIQUE INDEX presence_list_observers ON presence_list(
+    user_id, observed_user_id
+);
+
+--
+DELETE FROM room_aliases WHERE rowid not in (
+    SELECT MIN(rowid) FROM room_aliases GROUP BY room_alias
+);
+
+DROP INDEX IF EXISTS room_aliases_id;
+CREATE INDEX room_aliases_id ON room_aliases(room_id);
--- a/synapse/storage/databases/main/schema/delta/16/users.sql
+++ b/synapse/storage/databases/main/schema/delta/16/users.sql
@ -0,0 +1,56 @@
+-- Convert `access_tokens`.user from rowids to user strings.
+-- MUST BE DONE BEFORE REMOVING ID COLUMN FROM USERS TABLE BELOW
+CREATE TABLE IF NOT EXISTS new_access_tokens(
+    id BIGINT UNSIGNED PRIMARY KEY,
+    user_id TEXT NOT NULL,
+    device_id TEXT,
+    token TEXT NOT NULL,
+    last_used BIGINT UNSIGNED,
+    UNIQUE(token)
+);
+
+INSERT INTO new_access_tokens
+    SELECT a.id, u.name, a.device_id, a.token, a.last_used
+    FROM access_tokens as a
+    INNER JOIN users as u ON u.id = a.user_id;
+
+DROP TABLE access_tokens;
+
+ALTER TABLE new_access_tokens RENAME TO access_tokens;
+
+-- Remove ID column from `users` table
+CREATE TABLE IF NOT EXISTS new_users(
+    name TEXT,
+    password_hash TEXT,
+    creation_ts BIGINT UNSIGNED,
+    admin BOOL DEFAULT 0 NOT NULL,
+    UNIQUE(name)
+);
+
+INSERT INTO new_users SELECT name, password_hash, creation_ts, admin FROM users;
+
+DROP TABLE users;
+
+ALTER TABLE new_users RENAME TO users;
+
+
+-- Remove UNIQUE constraint from `user_ips` table
+CREATE TABLE IF NOT EXISTS new_user_ips (
+    user_id TEXT NOT NULL,
+    access_token TEXT NOT NULL,
+    device_id TEXT,
+    ip TEXT NOT NULL,
+    user_agent TEXT NOT NULL,
+    last_seen BIGINT UNSIGNED NOT NULL
+);
+
+INSERT INTO new_user_ips
+    SELECT user, access_token, device_id, ip, user_agent, last_seen FROM user_ips;
+
+DROP TABLE user_ips;
+
+ALTER TABLE new_user_ips RENAME TO user_ips;
+
+CREATE INDEX IF NOT EXISTS user_ips_user ON user_ips(user_id);
+CREATE INDEX IF NOT EXISTS user_ips_user_ip ON user_ips(user_id, access_token, ip);
+
--- a/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
+++ b/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
@ -0,0 +1,18 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP INDEX IF EXISTS sent_transaction_dest;
+DROP INDEX IF EXISTS sent_transaction_sent;
+DROP INDEX IF EXISTS user_ips_user;
--- a/synapse/storage/databases/main/schema/delta/17/server_keys.sql
+++ b/synapse/storage/databases/main/schema/delta/17/server_keys.sql
@ -0,0 +1,24 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS server_keys_json (
+    server_name TEXT, -- Server name.
+    key_id TEXT, -- Requested key id.
+    from_server TEXT, -- Which server the keys were fetched from.
+    ts_added_ms INTEGER, -- When the keys were fetched
+    ts_valid_until_ms INTEGER, -- When this version of the keys exipires.
+    key_json bytea, -- JSON certificate for the remote server.
+    CONSTRAINT uniqueness UNIQUE (server_name, key_id, from_server)
+);
--- a/synapse/storage/databases/main/schema/delta/17/user_threepids.sql
+++ b/synapse/storage/databases/main/schema/delta/17/user_threepids.sql
@ -0,0 +1,9 @@
+CREATE TABLE user_threepids (
+    user_id TEXT NOT NULL,
+    medium TEXT NOT NULL,
+    address TEXT NOT NULL,
+    validated_at BIGINT NOT NULL,
+    added_at BIGINT NOT NULL,
+    CONSTRAINT user_medium_address UNIQUE (user_id, medium, address)
+);
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
--- a/synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql
+++ b/synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql
@ -0,0 +1,32 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS new_server_keys_json (
+    server_name TEXT NOT NULL, -- Server name.
+    key_id TEXT NOT NULL, -- Requested key id.
+    from_server TEXT NOT NULL, -- Which server the keys were fetched from.
+    ts_added_ms BIGINT NOT NULL, -- When the keys were fetched
+    ts_valid_until_ms BIGINT NOT NULL, -- When this version of the keys exipires.
+    key_json bytea NOT NULL, -- JSON certificate for the remote server.
+    CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server)
+);
+
+INSERT INTO new_server_keys_json
+    SELECT server_name, key_id, from_server,ts_added_ms, ts_valid_until_ms, key_json FROM server_keys_json ;
+
+DROP TABLE server_keys_json;
+
+ALTER TABLE new_server_keys_json RENAME TO server_keys_json;
--- a/synapse/storage/databases/main/schema/delta/19/event_index.sql
+++ b/synapse/storage/databases/main/schema/delta/19/event_index.sql
@ -0,0 +1,19 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE INDEX events_order_topo_stream_room ON events(
+    topological_ordering, stream_ordering, room_id
+);
--- a/synapse/storage/databases/main/schema/delta/20/dummy.sql
+++ b/synapse/storage/databases/main/schema/delta/20/dummy.sql
@ -0,0 +1 @@
+SELECT 1;
--- a/synapse/storage/databases/main/schema/delta/20/pushers.py
+++ b/synapse/storage/databases/main/schema/delta/20/pushers.py
@ -0,0 +1,88 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+Main purpose of this upgrade is to change the unique key on the
+pushers table again (it was missed when the v16 full schema was
+made) but this also changes the pushkey and data columns to text.
+When selecting a bytea column into a text column, postgres inserts
+the hex encoded data, and there's no portable way of getting the
+UTF-8 bytes, so we have to do it in Python.
+"""
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    logger.info("Porting pushers table...")
+    cur.execute(
+        """
+        CREATE TABLE IF NOT EXISTS pushers2 (
+          id BIGINT PRIMARY KEY,
+          user_name TEXT NOT NULL,
+          access_token BIGINT DEFAULT NULL,
+          profile_tag VARCHAR(32) NOT NULL,
+          kind VARCHAR(8) NOT NULL,
+          app_id VARCHAR(64) NOT NULL,
+          app_display_name VARCHAR(64) NOT NULL,
+          device_display_name VARCHAR(128) NOT NULL,
+          pushkey TEXT NOT NULL,
+          ts BIGINT NOT NULL,
+          lang VARCHAR(8),
+          data TEXT,
+          last_token TEXT,
+          last_success BIGINT,
+          failing_since BIGINT,
+          UNIQUE (app_id, pushkey, user_name)
+        )
+    """
+    )
+    cur.execute(
+        """SELECT
+        id, user_name, access_token, profile_tag, kind,
+        app_id, app_display_name, device_display_name,
+        pushkey, ts, lang, data, last_token, last_success,
+        failing_since
+        FROM pushers
+    """
+    )
+    count = 0
+    for row in cur.fetchall():
+        row = list(row)
+        row[8] = bytes(row[8]).decode("utf-8")
+        row[11] = bytes(row[11]).decode("utf-8")
+        cur.execute(
+            database_engine.convert_param_style(
+                """
+            INSERT into pushers2 (
+            id, user_name, access_token, profile_tag, kind,
+            app_id, app_display_name, device_display_name,
+            pushkey, ts, lang, data, last_token, last_success,
+            failing_since
+            ) values (%s)"""
+                % (",".join(["?" for _ in range(len(row))]))
+            ),
+            row,
+        )
+        count += 1
+    cur.execute("DROP TABLE pushers")
+    cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
+    logger.info("Moved %d pushers to new table", count)
+
+
+def run_upgrade(*args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql
+++ b/synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql
@ -0,0 +1,34 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS e2e_device_keys_json (
+    user_id TEXT NOT NULL, -- The user these keys are for.
+    device_id TEXT NOT NULL, -- Which of the user's devices these keys are for.
+    ts_added_ms BIGINT NOT NULL, -- When the keys were uploaded.
+    key_json TEXT NOT NULL, -- The keys for the device as a JSON blob.
+    CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id)
+);
+
+
+CREATE TABLE IF NOT EXISTS e2e_one_time_keys_json (
+    user_id TEXT NOT NULL, -- The user this one-time key is for.
+    device_id TEXT NOT NULL, -- The device this one-time key is for.
+    algorithm TEXT NOT NULL, -- Which algorithm this one-time key is for.
+    key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads.
+    ts_added_ms BIGINT NOT NULL, -- When this key was uploaded.
+    key_json TEXT NOT NULL, -- The key as a JSON blob.
+    CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id)
+);
--- a/synapse/storage/databases/main/schema/delta/21/receipts.sql
+++ b/synapse/storage/databases/main/schema/delta/21/receipts.sql
@ -0,0 +1,38 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS receipts_graph(
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_ids TEXT NOT NULL,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id)
+);
+
+CREATE TABLE IF NOT EXISTS receipts_linearized (
+    stream_id BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id)
+);
+
+CREATE INDEX receipts_linearized_id ON receipts_linearized(
+  stream_id
+);
--- a/synapse/storage/databases/main/schema/delta/22/receipts_index.sql
+++ b/synapse/storage/databases/main/schema/delta/22/receipts_index.sql
@ -0,0 +1,22 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized(
+    room_id, stream_id
+);
--- a/synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql
+++ b/synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql
@ -0,0 +1,19 @@
+CREATE TABLE IF NOT EXISTS user_threepids2 (
+    user_id TEXT NOT NULL,
+    medium TEXT NOT NULL,
+    address TEXT NOT NULL,
+    validated_at BIGINT NOT NULL,
+    added_at BIGINT NOT NULL,
+    CONSTRAINT medium_address UNIQUE (medium, address)
+);
+
+INSERT INTO user_threepids2
+	SELECT * FROM user_threepids WHERE added_at IN (
+		SELECT max(added_at) FROM user_threepids GROUP BY medium, address
+	)
+;
+
+DROP TABLE user_threepids;
+ALTER TABLE user_threepids2 RENAME TO user_threepids;
+
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
--- a/synapse/storage/databases/main/schema/delta/24/stats_reporting.sql
+++ b/synapse/storage/databases/main/schema/delta/24/stats_reporting.sql
@ -0,0 +1,18 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /* We used to create a table called stats_reporting, but this is no longer
+ * used and is removed in delta 54.
+ */
--- a/synapse/storage/databases/main/schema/delta/25/fts.py
+++ b/synapse/storage/databases/main/schema/delta/25/fts.py
@ -0,0 +1,80 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+POSTGRES_TABLE = """
+CREATE TABLE IF NOT EXISTS event_search (
+    event_id TEXT,
+    room_id TEXT,
+    sender TEXT,
+    key TEXT,
+    vector tsvector
+);
+
+CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
+CREATE INDEX event_search_ev_idx ON event_search(event_id);
+CREATE INDEX event_search_ev_ridx ON event_search(room_id);
+"""
+
+
+SQLITE_TABLE = (
+    "CREATE VIRTUAL TABLE event_search"
+    " USING fts4 ( event_id, room_id, sender, key, value )"
+)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    if isinstance(database_engine, PostgresEngine):
+        for statement in get_statements(POSTGRES_TABLE.splitlines()):
+            cur.execute(statement)
+    elif isinstance(database_engine, Sqlite3Engine):
+        cur.execute(SQLITE_TABLE)
+    else:
+        raise Exception("Unrecognized database engine")
+
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    min_stream_id = rows[0][0]
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    max_stream_id = rows[0][0]
+
+    if min_stream_id is not None and max_stream_id is not None:
+        progress = {
+            "target_min_stream_id_inclusive": min_stream_id,
+            "max_stream_id_exclusive": max_stream_id + 1,
+            "rows_inserted": 0,
+        }
+        progress_json = json.dumps(progress)
+
+        sql = (
+            "INSERT into background_updates (update_name, progress_json)"
+            " VALUES (?, ?)"
+        )
+
+        sql = database_engine.convert_param_style(sql)
+
+        cur.execute(sql, ("event_search", progress_json))
+
+
+def run_upgrade(*args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/25/guest_access.sql
+++ b/synapse/storage/databases/main/schema/delta/25/guest_access.sql
@ -0,0 +1,25 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a manual index of guest_access content of state events,
+ * so that we can join on them in SELECT statements.
+ */
+CREATE TABLE IF NOT EXISTS guest_access(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    guest_access TEXT NOT NULL,
+    UNIQUE (event_id)
+);
--- a/synapse/storage/databases/main/schema/delta/25/history_visibility.sql
+++ b/synapse/storage/databases/main/schema/delta/25/history_visibility.sql
@ -0,0 +1,25 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a manual index of history_visibility content of state events,
+ * so that we can join on them in SELECT statements.
+ */
+CREATE TABLE IF NOT EXISTS history_visibility(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    history_visibility TEXT NOT NULL,
+    UNIQUE (event_id)
+);
--- a/synapse/storage/databases/main/schema/delta/25/tags.sql
+++ b/synapse/storage/databases/main/schema/delta/25/tags.sql
@ -0,0 +1,38 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS room_tags(
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    tag     TEXT NOT NULL,  -- The name of the tag.
+    content TEXT NOT NULL,  -- The JSON content of the tag.
+    CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag)
+);
+
+CREATE TABLE IF NOT EXISTS room_tags_revisions (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    stream_id BIGINT NOT NULL, -- The current version of the room tags.
+    CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id)
+);
+
+CREATE TABLE IF NOT EXISTS private_user_data_max_stream_id(
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id  BIGINT NOT NULL,
+    CHECK (Lock='X')
+);
+
+INSERT INTO private_user_data_max_stream_id (stream_id) VALUES (0);
--- a/synapse/storage/databases/main/schema/delta/26/account_data.sql
+++ b/synapse/storage/databases/main/schema/delta/26/account_data.sql
@ -0,0 +1,17 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ALTER TABLE private_user_data_max_stream_id RENAME TO account_data_max_stream_id;
--- a/synapse/storage/databases/main/schema/delta/27/account_data.sql
+++ b/synapse/storage/databases/main/schema/delta/27/account_data.sql
@ -0,0 +1,36 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS account_data(
+    user_id TEXT NOT NULL,
+    account_data_type TEXT NOT NULL, -- The type of the account_data.
+    stream_id BIGINT NOT NULL, -- The version of the account_data.
+    content TEXT NOT NULL,  -- The JSON content of the account_data
+    CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type)
+);
+
+
+CREATE TABLE IF NOT EXISTS room_account_data(
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    account_data_type TEXT NOT NULL, -- The type of the account_data.
+    stream_id BIGINT NOT NULL, -- The version of the account_data.
+    content TEXT NOT NULL,  -- The JSON content of the account_data
+    CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type)
+);
+
+
+CREATE INDEX account_data_stream_id on account_data(user_id, stream_id);
+CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id);
--- a/synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql
+++ b/synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql
@ -0,0 +1,26 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Keeps track of what rooms users have left and don't want to be able to
+ * access again.
+ *
+ * If all users on this server have left a room, we can delete the room
+ * entirely.
+ *
+ * This column should always contain either 0 or 1.
+ */
+
+ ALTER TABLE room_memberships ADD COLUMN forgotten INTEGER DEFAULT 0;
--- a/synapse/storage/databases/main/schema/delta/27/ts.py
+++ b/synapse/storage/databases/main/schema/delta/27/ts.py
@ -0,0 +1,59 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = (
+    "ALTER TABLE events ADD COLUMN origin_server_ts BIGINT;"
+    "CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering);"
+)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    for statement in get_statements(ALTER_TABLE.splitlines()):
+        cur.execute(statement)
+
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    min_stream_id = rows[0][0]
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    max_stream_id = rows[0][0]
+
+    if min_stream_id is not None and max_stream_id is not None:
+        progress = {
+            "target_min_stream_id_inclusive": min_stream_id,
+            "max_stream_id_exclusive": max_stream_id + 1,
+            "rows_inserted": 0,
+        }
+        progress_json = json.dumps(progress)
+
+        sql = (
+            "INSERT into background_updates (update_name, progress_json)"
+            " VALUES (?, ?)"
+        )
+
+        sql = database_engine.convert_param_style(sql)
+
+        cur.execute(sql, ("event_origin_server_ts", progress_json))
+
+
+def run_upgrade(*args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/28/event_push_actions.sql
+++ b/synapse/storage/databases/main/schema/delta/28/event_push_actions.sql
@ -0,0 +1,27 @@
+/* Copyright 2015 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS event_push_actions(
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    profile_tag VARCHAR(32),
+    actions TEXT NOT NULL,
+    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
+);
+
+
+CREATE INDEX event_push_actions_room_id_event_id_user_id_profile_tag on event_push_actions(room_id, event_id, user_id, profile_tag);
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
--- a/synapse/storage/databases/main/schema/delta/28/events_room_stream.sql
+++ b/synapse/storage/databases/main/schema/delta/28/events_room_stream.sql
@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX events_room_stream on events(room_id, stream_ordering);
--- a/synapse/storage/databases/main/schema/delta/28/public_roms_index.sql
+++ b/synapse/storage/databases/main/schema/delta/28/public_roms_index.sql
@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX public_room_index on rooms(is_public);
--- a/synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql
+++ b/synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql
@ -0,0 +1,22 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX receipts_linearized_user ON receipts_linearized(
+    user_id
+);
--- a/synapse/storage/databases/main/schema/delta/28/upgrade_times.sql
+++ b/synapse/storage/databases/main/schema/delta/28/upgrade_times.sql
@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Stores the timestamp when a user upgraded from a guest to a full user, if
+ * that happened.
+ */
+
+ALTER TABLE users ADD COLUMN upgrade_ts BIGINT;
--- a/synapse/storage/databases/main/schema/delta/28/users_is_guest.sql
+++ b/synapse/storage/databases/main/schema/delta/28/users_is_guest.sql
@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE users ADD is_guest SMALLINT DEFAULT 0 NOT NULL;
+/*
+ * NB: any guest users created between 27 and 28 will be incorrectly
+ * marked as not guests: we don't bother to fill these in correctly
+ * because guest access is not really complete in 27 anyway so it's
+ * very unlikley there will be any guest users created.
+ */
--- a/synapse/storage/databases/main/schema/delta/29/push_actions.sql
+++ b/synapse/storage/databases/main/schema/delta/29/push_actions.sql
@ -0,0 +1,35 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE event_push_actions ADD COLUMN topological_ordering BIGINT;
+ALTER TABLE event_push_actions ADD COLUMN stream_ordering BIGINT;
+ALTER TABLE event_push_actions ADD COLUMN notif SMALLINT;
+ALTER TABLE event_push_actions ADD COLUMN highlight SMALLINT;
+
+UPDATE event_push_actions SET stream_ordering = (
+    SELECT stream_ordering FROM events WHERE event_id = event_push_actions.event_id
+), topological_ordering = (
+    SELECT topological_ordering FROM events WHERE event_id = event_push_actions.event_id
+);
+
+UPDATE event_push_actions SET notif = 1, highlight = 0;
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions(
+    user_id, room_id, topological_ordering, stream_ordering
+);
--- a/synapse/storage/databases/main/schema/delta/30/alias_creator.sql
+++ b/synapse/storage/databases/main/schema/delta/30/alias_creator.sql
@ -0,0 +1,16 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE room_aliases ADD COLUMN creator TEXT;
--- a/synapse/storage/databases/main/schema/delta/30/as_users.py
+++ b/synapse/storage/databases/main/schema/delta/30/as_users.py
@ -0,0 +1,67 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from synapse.config.appservice import load_appservices
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    # NULL indicates user was not registered by an appservice.
+    try:
+        cur.execute("ALTER TABLE users ADD COLUMN appservice_id TEXT")
+    except Exception:
+        # Maybe we already added the column? Hope so...
+        pass
+
+
+def run_upgrade(cur, database_engine, config, *args, **kwargs):
+    cur.execute("SELECT name FROM users")
+    rows = cur.fetchall()
+
+    config_files = []
+    try:
+        config_files = config.app_service_config_files
+    except AttributeError:
+        logger.warning("Could not get app_service_config_files from config")
+        pass
+
+    appservices = load_appservices(config.server_name, config_files)
+
+    owned = {}
+
+    for row in rows:
+        user_id = row[0]
+        for appservice in appservices:
+            if appservice.is_exclusive_user(user_id):
+                if user_id in owned.keys():
+                    logger.error(
+                        "user_id %s was owned by more than one application"
+                        " service (IDs %s and %s); assigning arbitrarily to %s"
+                        % (user_id, owned[user_id], appservice.id, owned[user_id])
+                    )
+                owned.setdefault(appservice.id, []).append(user_id)
+
+    for as_id, user_ids in owned.items():
+        n = 100
+        user_chunks = (user_ids[i : i + 100] for i in range(0, len(user_ids), n))
+        for chunk in user_chunks:
+            cur.execute(
+                database_engine.convert_param_style(
+                    "UPDATE users SET appservice_id = ? WHERE name IN (%s)"
+                    % (",".join("?" for _ in chunk),)
+                ),
+                [as_id] + chunk,
+            )
--- a/synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql
+++ b/synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql
@ -0,0 +1,25 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS deleted_pushers(
+    stream_id BIGINT NOT NULL,
+    app_id TEXT NOT NULL,
+    pushkey TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    /* We only track the most recent delete for each app_id, pushkey and user_id. */
+    UNIQUE (app_id, pushkey, user_id)
+);
+
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id);
--- a/synapse/storage/databases/main/schema/delta/30/presence_stream.sql
+++ b/synapse/storage/databases/main/schema/delta/30/presence_stream.sql
@ -0,0 +1,30 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ CREATE TABLE presence_stream(
+     stream_id BIGINT,
+     user_id TEXT,
+     state TEXT,
+     last_active_ts BIGINT,
+     last_federation_update_ts BIGINT,
+     last_user_sync_ts BIGINT,
+     status_msg TEXT,
+     currently_active BOOLEAN
+ );
+
+ CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id);
+ CREATE INDEX presence_stream_user_id ON presence_stream(user_id);
+ CREATE INDEX presence_stream_state ON presence_stream(state);
--- a/synapse/storage/databases/main/schema/delta/30/public_rooms.sql
+++ b/synapse/storage/databases/main/schema/delta/30/public_rooms.sql
@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* This release removes the restriction that published rooms must have an alias,
+ * so we go back and ensure the only 'public' rooms are ones with an alias.
+ * We use (1 = 0) and (1 = 1) so that it works in both postgres and sqlite
+ */
+UPDATE rooms SET is_public = (1 = 0) WHERE is_public = (1 = 1) AND room_id not in (
+    SELECT room_id FROM room_aliases
+);
--- a/synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql
+++ b/synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql
@ -0,0 +1,38 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+CREATE TABLE push_rules_stream(
+    stream_id BIGINT NOT NULL,
+    event_stream_ordering BIGINT NOT NULL,
+    user_id TEXT NOT NULL,
+    rule_id TEXT NOT NULL,
+    op TEXT NOT NULL, -- One of "ENABLE", "DISABLE", "ACTIONS", "ADD", "DELETE"
+    priority_class SMALLINT,
+    priority INTEGER,
+    conditions TEXT,
+    actions TEXT
+);
+
+-- The extra data for each operation is:
+-- * ENABLE, DISABLE, DELETE: []
+-- * ACTIONS: ["actions"]
+-- * ADD: ["priority_class", "priority", "actions", "conditions"]
+
+-- Index for replication queries.
+CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id);
+-- Index for /sync queries.
+CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id);
--- a/synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql
+++ b/synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql
@ -0,0 +1,24 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Stores guest account access tokens generated for unbound 3pids.
+CREATE TABLE threepid_guest_access_tokens(
+    medium TEXT, -- The medium of the 3pid. Must be "email".
+    address TEXT, -- The 3pid address.
+    guest_access_token TEXT, -- The access token for a guest user for this 3pid.
+    first_inviter TEXT -- User ID of the first user to invite this 3pid to a room.
+);
+
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address);
--- a/synapse/storage/databases/main/schema/delta/31/invites.sql
+++ b/synapse/storage/databases/main/schema/delta/31/invites.sql
@ -0,0 +1,42 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE local_invites(
+    stream_id BIGINT NOT NULL,
+    inviter TEXT NOT NULL,
+    invitee TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    locally_rejected TEXT,
+    replaced_by TEXT
+);
+
+-- Insert all invites for local users into new `invites` table
+INSERT INTO local_invites SELECT
+        stream_ordering as stream_id,
+        sender as inviter,
+        state_key as invitee,
+        event_id,
+        room_id,
+        NULL as locally_rejected,
+        NULL as replaced_by
+    FROM events
+    NATURAL JOIN current_state_events
+    NATURAL JOIN room_memberships
+    WHERE membership = 'invite'  AND state_key IN (SELECT name FROM users);
+
+CREATE INDEX local_invites_id ON local_invites(stream_id);
+CREATE INDEX local_invites_for_user_idx ON local_invites(invitee, locally_rejected, replaced_by, room_id);
--- a/synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql
+++ b/synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql
@ -0,0 +1,27 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE local_media_repository_url_cache(
+    url TEXT,              -- the URL being cached
+    response_code INTEGER, -- the HTTP response code of this download attempt
+    etag TEXT,             -- the etag header of this response
+    expires INTEGER,       -- the number of ms this response was valid for
+    og TEXT,               -- cache of the OG metadata of this URL as JSON
+    media_id TEXT,         -- the media_id, if any, of the URL's content in the repo
+    download_ts BIGINT     -- the timestamp of this download attempt
+);
+
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts
+    ON local_media_repository_url_cache(url, download_ts);
--- a/synapse/storage/databases/main/schema/delta/31/pushers.py
+++ b/synapse/storage/databases/main/schema/delta/31/pushers.py
@ -0,0 +1,87 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change the last_token to last_stream_ordering now that pushers no longer
+# listen on an event stream but instead select out of the event_push_actions
+# table.
+
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def token_to_stream_ordering(token):
+    return int(token[1:].split("_")[0])
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    logger.info("Porting pushers table, delta 31...")
+    cur.execute(
+        """
+        CREATE TABLE IF NOT EXISTS pushers2 (
+          id BIGINT PRIMARY KEY,
+          user_name TEXT NOT NULL,
+          access_token BIGINT DEFAULT NULL,
+          profile_tag VARCHAR(32) NOT NULL,
+          kind VARCHAR(8) NOT NULL,
+          app_id VARCHAR(64) NOT NULL,
+          app_display_name VARCHAR(64) NOT NULL,
+          device_display_name VARCHAR(128) NOT NULL,
+          pushkey TEXT NOT NULL,
+          ts BIGINT NOT NULL,
+          lang VARCHAR(8),
+          data TEXT,
+          last_stream_ordering INTEGER,
+          last_success BIGINT,
+          failing_since BIGINT,
+          UNIQUE (app_id, pushkey, user_name)
+        )
+    """
+    )
+    cur.execute(
+        """SELECT
+        id, user_name, access_token, profile_tag, kind,
+        app_id, app_display_name, device_display_name,
+        pushkey, ts, lang, data, last_token, last_success,
+        failing_since
+        FROM pushers
+    """
+    )
+    count = 0
+    for row in cur.fetchall():
+        row = list(row)
+        row[12] = token_to_stream_ordering(row[12])
+        cur.execute(
+            database_engine.convert_param_style(
+                """
+            INSERT into pushers2 (
+            id, user_name, access_token, profile_tag, kind,
+            app_id, app_display_name, device_display_name,
+            pushkey, ts, lang, data, last_stream_ordering, last_success,
+            failing_since
+            ) values (%s)"""
+                % (",".join(["?" for _ in range(len(row))]))
+            ),
+            row,
+        )
+        count += 1
+    cur.execute("DROP TABLE pushers")
+    cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
+    logger.info("Moved %d pushers to new table", count)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/31/pushers_index.sql
+++ b/synapse/storage/databases/main/schema/delta/31/pushers_index.sql
@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+ CREATE INDEX event_push_actions_stream_ordering on event_push_actions(
+     stream_ordering, user_id
+ );
--- a/synapse/storage/databases/main/schema/delta/31/search_update.py
+++ b/synapse/storage/databases/main/schema/delta/31/search_update.py
@ -0,0 +1,64 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = """
+ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT;
+ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT;
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    if not isinstance(database_engine, PostgresEngine):
+        return
+
+    for statement in get_statements(ALTER_TABLE.splitlines()):
+        cur.execute(statement)
+
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    min_stream_id = rows[0][0]
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    max_stream_id = rows[0][0]
+
+    if min_stream_id is not None and max_stream_id is not None:
+        progress = {
+            "target_min_stream_id_inclusive": min_stream_id,
+            "max_stream_id_exclusive": max_stream_id + 1,
+            "rows_inserted": 0,
+            "have_added_indexes": False,
+        }
+        progress_json = json.dumps(progress)
+
+        sql = (
+            "INSERT into background_updates (update_name, progress_json)"
+            " VALUES (?, ?)"
+        )
+
+        sql = database_engine.convert_param_style(sql)
+
+        cur.execute(sql, ("event_search_order", progress_json))
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/32/events.sql
+++ b/synapse/storage/databases/main/schema/delta/32/events.sql
@ -0,0 +1,16 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE events ADD COLUMN received_ts BIGINT;
--- a/synapse/storage/databases/main/schema/delta/32/openid.sql
+++ b/synapse/storage/databases/main/schema/delta/32/openid.sql
@ -0,0 +1,9 @@
+
+CREATE TABLE open_id_tokens (
+    token TEXT NOT NULL PRIMARY KEY,
+    ts_valid_until_ms bigint NOT NULL,
+    user_id TEXT NOT NULL,
+    UNIQUE (token)
+);
+
+CREATE index open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms);
--- a/synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql
+++ b/synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql
@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE pusher_throttle(
+    pusher BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    last_sent_ts BIGINT,
+    throttle_ms BIGINT,
+    PRIMARY KEY (pusher, room_id)
+);
--- a/synapse/storage/databases/main/schema/delta/32/remove_indices.sql
+++ b/synapse/storage/databases/main/schema/delta/32/remove_indices.sql
@ -0,0 +1,33 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- The following indices are redundant, other indices are equivalent or
+-- supersets
+DROP INDEX IF EXISTS events_room_id; -- Prefix of events_room_stream
+DROP INDEX IF EXISTS events_order; -- Prefix of events_order_topo_stream_room
+DROP INDEX IF EXISTS events_topological_ordering; -- Prefix of events_order_topo_stream_room
+DROP INDEX IF EXISTS events_stream_ordering; -- Duplicate of PRIMARY KEY
+DROP INDEX IF EXISTS event_to_state_groups_id; -- Duplicate of PRIMARY KEY
+DROP INDEX IF EXISTS event_push_actions_room_id_event_id_user_id_profile_tag; -- Duplicate of UNIQUE CONSTRAINT
+
+DROP INDEX IF EXISTS st_extrem_id; -- Prefix of UNIQUE CONSTRAINT
+DROP INDEX IF EXISTS event_signatures_id; -- Prefix of UNIQUE CONSTRAINT
+DROP INDEX IF EXISTS redactions_event_id; -- Duplicate of UNIQUE CONSTRAINT
+
+-- The following indices were unused
+DROP INDEX IF EXISTS remote_media_cache_thumbnails_media_id;
+DROP INDEX IF EXISTS evauth_edges_auth_id;
+DROP INDEX IF EXISTS presence_stream_state;
--- a/synapse/storage/databases/main/schema/delta/32/reports.sql
+++ b/synapse/storage/databases/main/schema/delta/32/reports.sql
@ -0,0 +1,25 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE event_reports(
+    id BIGINT NOT NULL PRIMARY KEY,
+    received_ts BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    reason TEXT,
+    content TEXT
+);
--- a/synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql
+++ b/synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql
@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('access_tokens_device_index', '{}');
--- a/synapse/storage/databases/main/schema/delta/33/devices.sql
+++ b/synapse/storage/databases/main/schema/delta/33/devices.sql
@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE devices (
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL,
+    display_name TEXT,
+    CONSTRAINT device_uniqueness UNIQUE (user_id, device_id)
+);
--- a/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql
+++ b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql
@ -0,0 +1,19 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- make sure that we have a device record for each set of E2E keys, so that the
+-- user can delete them if they like.
+INSERT INTO devices
+    SELECT user_id, device_id, NULL FROM e2e_device_keys_json;
--- a/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
+++ b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- a previous version of the "devices_for_e2e_keys" delta set all the device
+-- names to "unknown device". This wasn't terribly helpful
+UPDATE devices
+    SET display_name = NULL
+    WHERE display_name = 'unknown device';
--- a/synapse/storage/databases/main/schema/delta/33/event_fields.py
+++ b/synapse/storage/databases/main/schema/delta/33/event_fields.py
@ -0,0 +1,59 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = """
+ALTER TABLE events ADD COLUMN sender TEXT;
+ALTER TABLE events ADD COLUMN contains_url BOOLEAN;
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    for statement in get_statements(ALTER_TABLE.splitlines()):
+        cur.execute(statement)
+
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    min_stream_id = rows[0][0]
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    rows = cur.fetchall()
+    max_stream_id = rows[0][0]
+
+    if min_stream_id is not None and max_stream_id is not None:
+        progress = {
+            "target_min_stream_id_inclusive": min_stream_id,
+            "max_stream_id_exclusive": max_stream_id + 1,
+            "rows_inserted": 0,
+        }
+        progress_json = json.dumps(progress)
+
+        sql = (
+            "INSERT into background_updates (update_name, progress_json)"
+            " VALUES (?, ?)"
+        )
+
+        sql = database_engine.convert_param_style(sql)
+
+        cur.execute(sql, ("event_fields_sender_url", progress_json))
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/33/remote_media_ts.py
+++ b/synapse/storage/databases/main/schema/delta/33/remote_media_ts.py
@ -0,0 +1,30 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+ALTER_TABLE = "ALTER TABLE remote_media_cache ADD COLUMN last_access_ts BIGINT"
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    cur.execute(ALTER_TABLE)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    cur.execute(
+        database_engine.convert_param_style(
+            "UPDATE remote_media_cache SET last_access_ts = ?"
+        ),
+        (int(time.time() * 1000),),
+    )
--- a/synapse/storage/databases/main/schema/delta/33/user_ips_index.sql
+++ b/synapse/storage/databases/main/schema/delta/33/user_ips_index.sql
@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('user_ips_device_index', '{}');
--- a/synapse/storage/databases/main/schema/delta/34/appservice_stream.sql
+++ b/synapse/storage/databases/main/schema/delta/34/appservice_stream.sql
@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS appservice_stream_position(
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_ordering BIGINT,
+    CHECK (Lock='X')
+);
+
+INSERT INTO appservice_stream_position (stream_ordering)
+    SELECT COALESCE(MAX(stream_ordering), 0) FROM events;
--- a/synapse/storage/databases/main/schema/delta/34/cache_stream.py
+++ b/synapse/storage/databases/main/schema/delta/34/cache_stream.py
@ -0,0 +1,46 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+# This stream is used to notify replication slaves that some caches have
+# been invalidated that they cannot infer from the other streams.
+CREATE_TABLE = """
+CREATE TABLE cache_invalidation_stream (
+    stream_id       BIGINT,
+    cache_func      TEXT,
+    keys            TEXT[],
+    invalidation_ts BIGINT
+);
+
+CREATE INDEX cache_invalidation_stream_id ON cache_invalidation_stream(stream_id);
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+    if not isinstance(database_engine, PostgresEngine):
+        return
+
+    for statement in get_statements(CREATE_TABLE.splitlines()):
+        cur.execute(statement)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    pass
--- a/synapse/storage/databases/main/schema/delta/34/device_inbox.sql
+++ b/synapse/storage/databases/main/schema/delta/34/device_inbox.sql
@ -0,0 +1,24 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE device_inbox (
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL,
+    stream_id BIGINT NOT NULL,
+    message_json TEXT NOT NULL -- {"type":, "sender":, "content",}
+);
+
+CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id);
+CREATE INDEX device_inbox_stream_id ON device_inbox(stream_id);
--- a/Show more
+++ b/Show more
				`@ -0,0 +1,2 @@`

				`CREATE INDEX IF NOT EXISTS presence_list_user_id ON presence_list (user_id);`