From 07d765209dea12229e70a09784e647611acabcda Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 19 Apr 2016 14:24:36 +0100 Subject: [PATCH] First bits of emailpusher Mostly logic of when to send an email --- synapse/push/emailpusher.py | 214 ++++++++++++++++++ synapse/push/pusher.py | 4 +- synapse/storage/event_push_actions.py | 57 ++++- synapse/storage/events.py | 2 + synapse/storage/pusher.py | 27 +++ synapse/storage/schema/delta/31/events.sql | 16 ++ .../schema/delta/31/pusher_throttle.sql | 23 ++ 7 files changed, 335 insertions(+), 8 deletions(-) create mode 100644 synapse/push/emailpusher.py create mode 100644 synapse/storage/schema/delta/31/events.sql create mode 100644 synapse/storage/schema/delta/31/pusher_throttle.sql diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py new file mode 100644 index 000000000..f9954df39 --- /dev/null +++ b/synapse/push/emailpusher.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, reactor + +import logging + +from synapse.util.metrics import Measure +from synapse.util.async import run_on_reactor + +logger = logging.getLogger(__name__) + +# The amount of time we always wait before ever emailing about a notification +# (to give the user a chance to respond to other push or notice the window) +DELAY_BEFORE_MAIL_MS = 2 * 60 * 1000 + +THROTTLE_START_MS = 2 * 60 * 1000 +THROTTLE_MAX_MS = (2 * 60 * 1000) * (2**11) # ~3 days + +# If no event triggers a notification for this long after the previous, +# the throttle is released. +THROTTLE_RESET_AFTER_MS = (2 * 60 * 1000) * (2**11) # ~3 days + + +class EmailPusher(object): + """ + A pusher that sends email notifications about events (approximately) + when they happen. + This shares quite a bit of code with httpusher: it would be good to + factor out the common parts + """ + def __init__(self, hs, pusherdict): + self.hs = hs + self.store = self.hs.get_datastore() + self.clock = self.hs.get_clock() + self.pusher_id = pusherdict['id'] + self.user_id = pusherdict['user_name'] + self.app_id = pusherdict['app_id'] + self.email = pusherdict['pushkey'] + self.last_stream_ordering = pusherdict['last_stream_ordering'] + self.timed_call = None + self.throttle_params = None + + # See httppusher + self.max_stream_ordering = None + + @defer.inlineCallbacks + def on_started(self): + self.throttle_params = yield self.store.get_throttle_params_by_room( + self.pusher_id + ) + yield self._process() + + @defer.inlineCallbacks + def on_new_notifications(self, min_stream_ordering, max_stream_ordering): + with Measure(self.clock, "push.on_new_notifications"): + self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) + yield self._process() + + @defer.inlineCallbacks + def on_timer(self): + self.timed_call = None + with Measure(self.clock, "push.on_timer"): + yield self._process() + + @defer.inlineCallbacks + def _process(self): + last_notifs = yield self.store.get_time_of_latest_push_action_by_room_for_user( + self.user_id + ) + + unprocessed = yield self.store.get_unread_push_actions_for_user_in_range( + self.user_id, self.last_stream_ordering, self.max_stream_ordering + ) + + soonest_due_at = None + + for push_action in unprocessed: + received_at = push_action['received_ts'] + if received_at is None: + received_at = 0 + notif_ready_at = received_at + DELAY_BEFORE_MAIL_MS + + room_ready_at = self.room_ready_to_notify_at( + push_action['room_id'], self.get_room_last_notif_ts( + last_notifs, push_action['room_id'] + ) + ) + + should_notify_at = max(notif_ready_at, room_ready_at) + + if should_notify_at < self.clock.time_msec(): + # one of our notifications is ready for sending, so we send + # *one* email updating the user on their notifications, + # we then consider all previously outstanding notifications + # to be delivered. + yield self.send_notification(push_action) + + yield self.save_last_stream_ordering_and_success(max([ + ea['stream_ordering'] for ea in unprocessed + ])) + yield self.sent_notif_update_throttle( + push_action['room_id'], push_action + ) + else: + if soonest_due_at is None or should_notify_at < soonest_due_at: + soonest_due_at = should_notify_at + + if self.timed_call is not None: + self.timed_call.cancel() + self.timed_call = None + + if soonest_due_at is not None: + self.timed_call = reactor.callLater( + self.seconds_until(soonest_due_at), self.on_timer + ) + + @defer.inlineCallbacks + def save_last_stream_ordering_and_success(self, last_stream_ordering): + self.last_stream_ordering = last_stream_ordering + yield self.store.update_pusher_last_stream_ordering_and_success( + self.app_id, self.email, self.user_id, + last_stream_ordering, self.clock.time_msec() + ) + + def seconds_until(self, ts_msec): + return (ts_msec - self.clock.time_msec()) / 1000 + + def get_room_last_notif_ts(self, last_notif_by_room, room_id): + if room_id in last_notif_by_room: + return last_notif_by_room[room_id] + else: + return 0 + + def get_room_throttle_ms(self, room_id): + if room_id in self.throttle_params: + return self.throttle_params[room_id]["throttle_ms"] + else: + return 0 + + def get_room_last_sent_ts(self, room_id): + if room_id in self.throttle_params: + return self.throttle_params[room_id]["last_sent_ts"] + else: + return 0 + + def room_ready_to_notify_at(self, room_id, last_notif_time): + """ + Determines whether throttling should prevent us from sending an email + for the given room + Returns: True if we should send, False if we should not + """ + last_sent_ts = self.get_room_last_sent_ts(room_id) + throttle_ms = self.get_room_throttle_ms(room_id) + + may_send_at = last_sent_ts + throttle_ms + return may_send_at + + @defer.inlineCallbacks + def sent_notif_update_throttle(self, room_id, notified_push_action): + # We have sent a notification, so update the throttle accordingly. + # If the event that triggered the notif happened more than + # THROTTLE_RESET_AFTER_MS after the previous one that triggered a + # notif, we release the throttle. Otherwise, the throttle is increased. + time_of_previous_notifs = yield self.store.get_time_of_last_push_action_before( + notified_push_action['stream_ordering'] + ) + + time_of_this_notifs = notified_push_action['received_ts'] + + if time_of_previous_notifs is not None and time_of_this_notifs is not None: + gap = time_of_this_notifs - time_of_previous_notifs + else: + # if we don't know the arrival time of one of the notifs (it was not + # stored prior to email notification code) then assume a gap of + # zero which will just not reset the throttle + gap = 0 + + current_throttle_ms = self.get_room_throttle_ms(room_id) + + if gap > THROTTLE_RESET_AFTER_MS: + new_throttle_ms = THROTTLE_START_MS + else: + if current_throttle_ms == 0: + new_throttle_ms = THROTTLE_START_MS + else: + new_throttle_ms = min( + current_throttle_ms * 2, + THROTTLE_MAX_MS + ) + self.throttle_params[room_id] = { + "last_sent_ts": self.clock.time_msec(), + "throttle_ms": new_throttle_ms + } + yield self.store.set_throttle_params( + self.pusher_id, room_id, self.throttle_params[room_id] + ) + + @defer.inlineCallbacks + def send_notification(self, push_action): + yield run_on_reactor() + logger.error("sending notif email for user %r", self.user_id) \ No newline at end of file diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 496083750..f7c3021fc 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -1,7 +1,9 @@ from httppusher import HttpPusher +from emailpusher import EmailPusher PUSHER_TYPES = { - 'http': HttpPusher + 'http': HttpPusher, + 'email': EmailPusher, } diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 86a98b6f1..ad512b2f0 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -118,15 +118,17 @@ class EventPushActionsStore(SQLBaseStore): max_stream_ordering=None): def get_after_receipt(txn): sql = ( - "SELECT ep.event_id, ep.stream_ordering, ep.actions " + "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, " + "e.received_ts " "FROM event_push_actions AS ep, (" - " SELECT room_id, user_id," - " max(topological_ordering) as topological_ordering," - " max(stream_ordering) as stream_ordering" + " SELECT room_id, user_id, " + " max(topological_ordering) as topological_ordering, " + " max(stream_ordering) as stream_ordering " " FROM events" " NATURAL JOIN receipts_linearized WHERE receipt_type = 'm.read'" " GROUP BY room_id, user_id" ") AS rl " + "NATURAL JOIN events e " "WHERE" " ep.room_id = rl.room_id" " AND (" @@ -153,8 +155,10 @@ class EventPushActionsStore(SQLBaseStore): def get_no_receipt(txn): sql = ( - "SELECT ep.event_id, ep.stream_ordering, ep.actions " + "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, " + "e.received_ts " "FROM event_push_actions AS ep " + "JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id " "WHERE ep.room_id not in (" " SELECT room_id FROM events NATURAL JOIN receipts_linearized" " WHERE receipt_type = 'm.read' AND user_id = ? " @@ -175,11 +179,30 @@ class EventPushActionsStore(SQLBaseStore): defer.returnValue([ { "event_id": row[0], - "stream_ordering": row[1], - "actions": json.loads(row[2]), + "room_id": row[1], + "stream_ordering": row[2], + "actions": json.loads(row[3]), + "received_ts": row[4], } for row in after_read_receipt + no_read_receipt ]) + @defer.inlineCallbacks + def get_time_of_last_push_action_before(self, stream_ordering): + def f(txn): + sql = ( + "SELECT e.received_ts " + "FROM event_push_actions AS ep " + "JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id " + "WHERE ep.stream_ordering > ? " + "ORDER BY ep.stream_ordering ASC " + "LIMIT 1" + ) + txn.execute(sql, (stream_ordering,)) + return txn.fetchone() + result = yield self.runInteraction("get_time_of_last_push_action_before", f) + defer.returnValue(result[0] if result is not None else None) + + @defer.inlineCallbacks def get_latest_push_action_stream_ordering(self): def f(txn): @@ -190,6 +213,26 @@ class EventPushActionsStore(SQLBaseStore): ) defer.returnValue(result[0] or 0) + @defer.inlineCallbacks + def get_time_of_latest_push_action_by_room_for_user(self, user_id): + """ + Returns only the received_ts of the last notification in each of the + user's rooms, in a dict by room_id + """ + def f(txn): + txn.execute( + "SELECT ep.room_id, MAX(e.received_ts) " + "FROM event_push_actions AS ep " + "JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id " + "GROUP BY ep.room_id" + ) + return txn.fetchall() + result = yield self.runInteraction( + "get_time_of_latest_push_action_by_room_for_user", f + ) + + defer.returnValue({row[0]: row[1] for row in result}) + def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): # Sad that we have to blow away the cache for the whole room here txn.call_after( diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21487724e..dd58e001d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -55,6 +55,7 @@ class EventsStore(SQLBaseStore): def __init__(self, hs): super(EventsStore, self).__init__(hs) + self._clock = hs.get_clock() self.register_background_update_handler( self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts ) @@ -427,6 +428,7 @@ class EventsStore(SQLBaseStore): "outlier": event.internal_metadata.is_outlier(), "content": encode_json(event.content).decode("UTF-8"), "origin_server_ts": int(event.origin_server_ts), + "received_ts": self._clock.time_msec(), } for event, _ in events_and_contexts ], diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index e5755c0ae..caef9b59a 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -230,3 +230,30 @@ class PusherStore(SQLBaseStore): {'failing_since': failing_since}, desc="update_pusher_failing_since", ) + + @defer.inlineCallbacks + def get_throttle_params_by_room(self, pusher_id): + res = yield self._simple_select_list( + "pusher_throttle", + {"pusher": pusher_id}, + ["room_id", "last_sent_ts", "throttle_ms"], + desc="get_throttle_params_by_room" + ) + + params_by_room = {} + for row in res: + params_by_room[row["room_id"]] = { + "last_sent_ts": row["last_sent_ts"], + "throttle_ms": row["throttle_ms"] + } + + defer.returnValue(params_by_room) + + @defer.inlineCallbacks + def set_throttle_params(self, pusher_id, room_id, params): + yield self._simple_upsert( + "pusher_throttle", + {"pusher": pusher_id, "room_id": room_id}, + params, + desc="set_throttle_params" + ) \ No newline at end of file diff --git a/synapse/storage/schema/delta/31/events.sql b/synapse/storage/schema/delta/31/events.sql new file mode 100644 index 000000000..1dd0f9e17 --- /dev/null +++ b/synapse/storage/schema/delta/31/events.sql @@ -0,0 +1,16 @@ +/* Copyright 2016 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE events ADD COLUMN received_ts BIGINT; diff --git a/synapse/storage/schema/delta/31/pusher_throttle.sql b/synapse/storage/schema/delta/31/pusher_throttle.sql new file mode 100644 index 000000000..d86d30c13 --- /dev/null +++ b/synapse/storage/schema/delta/31/pusher_throttle.sql @@ -0,0 +1,23 @@ +/* Copyright 2016 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE pusher_throttle( + pusher BIGINT NOT NULL, + room_id TEXT NOT NULL, + last_sent_ts BIGINT, + throttle_ms BIGINT, + PRIMARY KEY (pusher, room_id) +);