First bits of emailpusher

Mostly logic of when to send an email
This commit is contained in:
David Baker 2016-04-19 14:24:36 +01:00
parent 48af68ba8e
commit 07d765209d
7 changed files with 335 additions and 8 deletions

214
synapse/push/emailpusher.py Normal file
View File

@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer, reactor
import logging
from synapse.util.metrics import Measure
from synapse.util.async import run_on_reactor
logger = logging.getLogger(__name__)
# The amount of time we always wait before ever emailing about a notification
# (to give the user a chance to respond to other push or notice the window)
DELAY_BEFORE_MAIL_MS = 2 * 60 * 1000
THROTTLE_START_MS = 2 * 60 * 1000
THROTTLE_MAX_MS = (2 * 60 * 1000) * (2**11) # ~3 days
# If no event triggers a notification for this long after the previous,
# the throttle is released.
THROTTLE_RESET_AFTER_MS = (2 * 60 * 1000) * (2**11) # ~3 days
class EmailPusher(object):
"""
A pusher that sends email notifications about events (approximately)
when they happen.
This shares quite a bit of code with httpusher: it would be good to
factor out the common parts
"""
def __init__(self, hs, pusherdict):
self.hs = hs
self.store = self.hs.get_datastore()
self.clock = self.hs.get_clock()
self.pusher_id = pusherdict['id']
self.user_id = pusherdict['user_name']
self.app_id = pusherdict['app_id']
self.email = pusherdict['pushkey']
self.last_stream_ordering = pusherdict['last_stream_ordering']
self.timed_call = None
self.throttle_params = None
# See httppusher
self.max_stream_ordering = None
@defer.inlineCallbacks
def on_started(self):
self.throttle_params = yield self.store.get_throttle_params_by_room(
self.pusher_id
)
yield self._process()
@defer.inlineCallbacks
def on_new_notifications(self, min_stream_ordering, max_stream_ordering):
with Measure(self.clock, "push.on_new_notifications"):
self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering)
yield self._process()
@defer.inlineCallbacks
def on_timer(self):
self.timed_call = None
with Measure(self.clock, "push.on_timer"):
yield self._process()
@defer.inlineCallbacks
def _process(self):
last_notifs = yield self.store.get_time_of_latest_push_action_by_room_for_user(
self.user_id
)
unprocessed = yield self.store.get_unread_push_actions_for_user_in_range(
self.user_id, self.last_stream_ordering, self.max_stream_ordering
)
soonest_due_at = None
for push_action in unprocessed:
received_at = push_action['received_ts']
if received_at is None:
received_at = 0
notif_ready_at = received_at + DELAY_BEFORE_MAIL_MS
room_ready_at = self.room_ready_to_notify_at(
push_action['room_id'], self.get_room_last_notif_ts(
last_notifs, push_action['room_id']
)
)
should_notify_at = max(notif_ready_at, room_ready_at)
if should_notify_at < self.clock.time_msec():
# one of our notifications is ready for sending, so we send
# *one* email updating the user on their notifications,
# we then consider all previously outstanding notifications
# to be delivered.
yield self.send_notification(push_action)
yield self.save_last_stream_ordering_and_success(max([
ea['stream_ordering'] for ea in unprocessed
]))
yield self.sent_notif_update_throttle(
push_action['room_id'], push_action
)
else:
if soonest_due_at is None or should_notify_at < soonest_due_at:
soonest_due_at = should_notify_at
if self.timed_call is not None:
self.timed_call.cancel()
self.timed_call = None
if soonest_due_at is not None:
self.timed_call = reactor.callLater(
self.seconds_until(soonest_due_at), self.on_timer
)
@defer.inlineCallbacks
def save_last_stream_ordering_and_success(self, last_stream_ordering):
self.last_stream_ordering = last_stream_ordering
yield self.store.update_pusher_last_stream_ordering_and_success(
self.app_id, self.email, self.user_id,
last_stream_ordering, self.clock.time_msec()
)
def seconds_until(self, ts_msec):
return (ts_msec - self.clock.time_msec()) / 1000
def get_room_last_notif_ts(self, last_notif_by_room, room_id):
if room_id in last_notif_by_room:
return last_notif_by_room[room_id]
else:
return 0
def get_room_throttle_ms(self, room_id):
if room_id in self.throttle_params:
return self.throttle_params[room_id]["throttle_ms"]
else:
return 0
def get_room_last_sent_ts(self, room_id):
if room_id in self.throttle_params:
return self.throttle_params[room_id]["last_sent_ts"]
else:
return 0
def room_ready_to_notify_at(self, room_id, last_notif_time):
"""
Determines whether throttling should prevent us from sending an email
for the given room
Returns: True if we should send, False if we should not
"""
last_sent_ts = self.get_room_last_sent_ts(room_id)
throttle_ms = self.get_room_throttle_ms(room_id)
may_send_at = last_sent_ts + throttle_ms
return may_send_at
@defer.inlineCallbacks
def sent_notif_update_throttle(self, room_id, notified_push_action):
# We have sent a notification, so update the throttle accordingly.
# If the event that triggered the notif happened more than
# THROTTLE_RESET_AFTER_MS after the previous one that triggered a
# notif, we release the throttle. Otherwise, the throttle is increased.
time_of_previous_notifs = yield self.store.get_time_of_last_push_action_before(
notified_push_action['stream_ordering']
)
time_of_this_notifs = notified_push_action['received_ts']
if time_of_previous_notifs is not None and time_of_this_notifs is not None:
gap = time_of_this_notifs - time_of_previous_notifs
else:
# if we don't know the arrival time of one of the notifs (it was not
# stored prior to email notification code) then assume a gap of
# zero which will just not reset the throttle
gap = 0
current_throttle_ms = self.get_room_throttle_ms(room_id)
if gap > THROTTLE_RESET_AFTER_MS:
new_throttle_ms = THROTTLE_START_MS
else:
if current_throttle_ms == 0:
new_throttle_ms = THROTTLE_START_MS
else:
new_throttle_ms = min(
current_throttle_ms * 2,
THROTTLE_MAX_MS
)
self.throttle_params[room_id] = {
"last_sent_ts": self.clock.time_msec(),
"throttle_ms": new_throttle_ms
}
yield self.store.set_throttle_params(
self.pusher_id, room_id, self.throttle_params[room_id]
)
@defer.inlineCallbacks
def send_notification(self, push_action):
yield run_on_reactor()
logger.error("sending notif email for user %r", self.user_id)

View File

@ -1,7 +1,9 @@
from httppusher import HttpPusher from httppusher import HttpPusher
from emailpusher import EmailPusher
PUSHER_TYPES = { PUSHER_TYPES = {
'http': HttpPusher 'http': HttpPusher,
'email': EmailPusher,
} }

View File

@ -118,7 +118,8 @@ class EventPushActionsStore(SQLBaseStore):
max_stream_ordering=None): max_stream_ordering=None):
def get_after_receipt(txn): def get_after_receipt(txn):
sql = ( sql = (
"SELECT ep.event_id, ep.stream_ordering, ep.actions " "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, "
"e.received_ts "
"FROM event_push_actions AS ep, (" "FROM event_push_actions AS ep, ("
" SELECT room_id, user_id, " " SELECT room_id, user_id, "
" max(topological_ordering) as topological_ordering, " " max(topological_ordering) as topological_ordering, "
@ -127,6 +128,7 @@ class EventPushActionsStore(SQLBaseStore):
" NATURAL JOIN receipts_linearized WHERE receipt_type = 'm.read'" " NATURAL JOIN receipts_linearized WHERE receipt_type = 'm.read'"
" GROUP BY room_id, user_id" " GROUP BY room_id, user_id"
") AS rl " ") AS rl "
"NATURAL JOIN events e "
"WHERE" "WHERE"
" ep.room_id = rl.room_id" " ep.room_id = rl.room_id"
" AND (" " AND ("
@ -153,8 +155,10 @@ class EventPushActionsStore(SQLBaseStore):
def get_no_receipt(txn): def get_no_receipt(txn):
sql = ( sql = (
"SELECT ep.event_id, ep.stream_ordering, ep.actions " "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, "
"e.received_ts "
"FROM event_push_actions AS ep " "FROM event_push_actions AS ep "
"JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id "
"WHERE ep.room_id not in (" "WHERE ep.room_id not in ("
" SELECT room_id FROM events NATURAL JOIN receipts_linearized" " SELECT room_id FROM events NATURAL JOIN receipts_linearized"
" WHERE receipt_type = 'm.read' AND user_id = ? " " WHERE receipt_type = 'm.read' AND user_id = ? "
@ -175,11 +179,30 @@ class EventPushActionsStore(SQLBaseStore):
defer.returnValue([ defer.returnValue([
{ {
"event_id": row[0], "event_id": row[0],
"stream_ordering": row[1], "room_id": row[1],
"actions": json.loads(row[2]), "stream_ordering": row[2],
"actions": json.loads(row[3]),
"received_ts": row[4],
} for row in after_read_receipt + no_read_receipt } for row in after_read_receipt + no_read_receipt
]) ])
@defer.inlineCallbacks
def get_time_of_last_push_action_before(self, stream_ordering):
def f(txn):
sql = (
"SELECT e.received_ts "
"FROM event_push_actions AS ep "
"JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id "
"WHERE ep.stream_ordering > ? "
"ORDER BY ep.stream_ordering ASC "
"LIMIT 1"
)
txn.execute(sql, (stream_ordering,))
return txn.fetchone()
result = yield self.runInteraction("get_time_of_last_push_action_before", f)
defer.returnValue(result[0] if result is not None else None)
@defer.inlineCallbacks @defer.inlineCallbacks
def get_latest_push_action_stream_ordering(self): def get_latest_push_action_stream_ordering(self):
def f(txn): def f(txn):
@ -190,6 +213,26 @@ class EventPushActionsStore(SQLBaseStore):
) )
defer.returnValue(result[0] or 0) defer.returnValue(result[0] or 0)
@defer.inlineCallbacks
def get_time_of_latest_push_action_by_room_for_user(self, user_id):
"""
Returns only the received_ts of the last notification in each of the
user's rooms, in a dict by room_id
"""
def f(txn):
txn.execute(
"SELECT ep.room_id, MAX(e.received_ts) "
"FROM event_push_actions AS ep "
"JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id "
"GROUP BY ep.room_id"
)
return txn.fetchall()
result = yield self.runInteraction(
"get_time_of_latest_push_action_by_room_for_user", f
)
defer.returnValue({row[0]: row[1] for row in result})
def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id):
# Sad that we have to blow away the cache for the whole room here # Sad that we have to blow away the cache for the whole room here
txn.call_after( txn.call_after(

View File

@ -55,6 +55,7 @@ class EventsStore(SQLBaseStore):
def __init__(self, hs): def __init__(self, hs):
super(EventsStore, self).__init__(hs) super(EventsStore, self).__init__(hs)
self._clock = hs.get_clock()
self.register_background_update_handler( self.register_background_update_handler(
self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts
) )
@ -427,6 +428,7 @@ class EventsStore(SQLBaseStore):
"outlier": event.internal_metadata.is_outlier(), "outlier": event.internal_metadata.is_outlier(),
"content": encode_json(event.content).decode("UTF-8"), "content": encode_json(event.content).decode("UTF-8"),
"origin_server_ts": int(event.origin_server_ts), "origin_server_ts": int(event.origin_server_ts),
"received_ts": self._clock.time_msec(),
} }
for event, _ in events_and_contexts for event, _ in events_and_contexts
], ],

View File

@ -230,3 +230,30 @@ class PusherStore(SQLBaseStore):
{'failing_since': failing_since}, {'failing_since': failing_since},
desc="update_pusher_failing_since", desc="update_pusher_failing_since",
) )
@defer.inlineCallbacks
def get_throttle_params_by_room(self, pusher_id):
res = yield self._simple_select_list(
"pusher_throttle",
{"pusher": pusher_id},
["room_id", "last_sent_ts", "throttle_ms"],
desc="get_throttle_params_by_room"
)
params_by_room = {}
for row in res:
params_by_room[row["room_id"]] = {
"last_sent_ts": row["last_sent_ts"],
"throttle_ms": row["throttle_ms"]
}
defer.returnValue(params_by_room)
@defer.inlineCallbacks
def set_throttle_params(self, pusher_id, room_id, params):
yield self._simple_upsert(
"pusher_throttle",
{"pusher": pusher_id, "room_id": room_id},
params,
desc="set_throttle_params"
)

View File

@ -0,0 +1,16 @@
/* Copyright 2016 OpenMarket Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
ALTER TABLE events ADD COLUMN received_ts BIGINT;

View File

@ -0,0 +1,23 @@
/* Copyright 2016 OpenMarket Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CREATE TABLE pusher_throttle(
pusher BIGINT NOT NULL,
room_id TEXT NOT NULL,
last_sent_ts BIGINT,
throttle_ms BIGINT,
PRIMARY KEY (pusher, room_id)
);