Update all the workers and master to use TCP replication

This commit is contained in:
Erik Johnston 2017-03-27 16:33:44 +01:00
parent 3a1f3f8388
commit 36c28bc467
9 changed files with 255 additions and 397 deletions

View File

@ -26,8 +26,8 @@ from synapse.replication.slave.storage.directory import DirectoryStore
from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.events import SlavedEventStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
@ -36,7 +36,7 @@ from synapse.util.versionstring import get_version_string
from synapse import events from synapse import events
from twisted.internet import reactor, defer from twisted.internet import reactor
from twisted.web.resource import Resource from twisted.web.resource import Resource
from daemonize import Daemonize from daemonize import Daemonize
@ -120,30 +120,23 @@ class AppserviceServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
appservice_handler = self.get_application_service_handler()
@defer.inlineCallbacks def build_tcp_replication(self):
def replicate(results): return ASReplicationHandler(self)
stream = results.get("events")
if stream:
max_stream_id = stream["position"]
yield appservice_handler.notify_interested_services(max_stream_id)
while True:
try: class ASReplicationHandler(ReplicationClientHandler):
args = store.stream_positions() def __init__(self, hs):
args["timeout"] = 30000 super(ASReplicationHandler, self).__init__(hs.get_datastore())
result = yield http_client.get_json(replication_url, args=args) self.appservice_handler = hs.get_application_service_handler()
yield store.process_replication(result)
replicate(result) def on_rdata(self, stream_name, token, rows):
except: super(ASReplicationHandler, self).on_rdata(stream_name, token, rows)
logger.exception("Error replicating from %r", replication_url)
yield sleep(30) if stream_name == "events":
max_stream_id = self.store.get_room_max_stream_ordering()
self.appservice_handler.notify_interested_services(max_stream_id)
def start(config_options): def start(config_options):
@ -199,7 +192,6 @@ def start(config_options):
reactor.run() reactor.run()
def start(): def start():
ps.replicate()
ps.get_datastore().start_profiling() ps.get_datastore().start_profiling()
ps.get_state_handler().start_caching() ps.get_state_handler().start_caching()

View File

@ -30,11 +30,11 @@ from synapse.replication.slave.storage.room import RoomStore
from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.directory import DirectoryStore
from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore
from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.slave.storage.transactions import TransactionStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.rest.client.v1.room import PublicRoomListRestServlet from synapse.rest.client.v1.room import PublicRoomListRestServlet
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.client_ips import ClientIpStore from synapse.storage.client_ips import ClientIpStore
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
@ -45,7 +45,7 @@ from synapse.crypto import context_factory
from synapse import events from synapse import events
from twisted.internet import reactor, defer from twisted.internet import reactor
from twisted.web.resource import Resource from twisted.web.resource import Resource
from daemonize import Daemonize from daemonize import Daemonize
@ -145,21 +145,10 @@ class ClientReaderServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
while True: def build_tcp_replication(self):
try: return ReplicationClientHandler(self.get_datastore())
args = store.stream_positions()
args["timeout"] = 30000
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result)
except:
logger.exception("Error replicating from %r", replication_url)
yield sleep(5)
def start(config_options): def start(config_options):
@ -209,7 +198,6 @@ def start(config_options):
def start(): def start():
ss.get_state_handler().start_caching() ss.get_state_handler().start_caching()
ss.get_datastore().start_profiling() ss.get_datastore().start_profiling()
ss.replicate()
reactor.callWhenRunning(start) reactor.callWhenRunning(start)

View File

@ -27,9 +27,9 @@ from synapse.replication.slave.storage.keys import SlavedKeyStore
from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.room import RoomStore
from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.slave.storage.transactions import TransactionStore
from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.directory import DirectoryStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
@ -42,7 +42,7 @@ from synapse.crypto import context_factory
from synapse import events from synapse import events
from twisted.internet import reactor, defer from twisted.internet import reactor
from twisted.web.resource import Resource from twisted.web.resource import Resource
from daemonize import Daemonize from daemonize import Daemonize
@ -134,21 +134,10 @@ class FederationReaderServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
while True: def build_tcp_replication(self):
try: return ReplicationClientHandler(self.get_datastore())
args = store.stream_positions()
args["timeout"] = 30000
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result)
except:
logger.exception("Error replicating from %r", replication_url)
yield sleep(5)
def start(config_options): def start(config_options):
@ -198,7 +187,6 @@ def start(config_options):
def start(): def start():
ss.get_state_handler().start_caching() ss.get_state_handler().start_caching()
ss.get_datastore().start_profiling() ss.get_datastore().start_profiling()
ss.replicate()
reactor.callWhenRunning(start) reactor.callWhenRunning(start)

View File

@ -31,9 +31,10 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore
from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.slave.storage.transactions import TransactionStore
from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.devices import SlavedDeviceStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.storage.presence import UserPresenceState from synapse.storage.presence import UserPresenceState
from synapse.util.async import sleep from synapse.util.async import Linearizer
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
@ -59,7 +60,23 @@ class FederationSenderSlaveStore(
SlavedDeviceInboxStore, TransactionStore, SlavedReceiptsStore, SlavedEventStore, SlavedDeviceInboxStore, TransactionStore, SlavedReceiptsStore, SlavedEventStore,
SlavedRegistrationStore, SlavedDeviceStore, SlavedRegistrationStore, SlavedDeviceStore,
): ):
pass def __init__(self, db_conn, hs):
super(FederationSenderSlaveStore, self).__init__(db_conn, hs)
self.federation_out_pos_startup = self._get_federation_out_pos(db_conn)
def _get_federation_out_pos(self, db_conn):
sql = (
"SELECT stream_id FROM federation_stream_position"
" WHERE type = ?"
)
sql = self.database_engine.convert_param_style(sql)
txn = db_conn.cursor()
txn.execute(sql, ("federation",))
rows = txn.fetchall()
txn.close()
return rows[0][0] if rows else -1
class FederationSenderServer(HomeServer): class FederationSenderServer(HomeServer):
@ -127,26 +144,29 @@ class FederationSenderServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
send_handler = FederationSenderHandler(self)
send_handler.on_start() def build_tcp_replication(self):
return FederationSenderReplicationHandler(self)
while True:
try: class FederationSenderReplicationHandler(ReplicationClientHandler):
args = store.stream_positions() def __init__(self, hs):
args.update((yield send_handler.stream_positions())) super(FederationSenderReplicationHandler, self).__init__(hs.get_datastore())
args["timeout"] = 30000 self.send_handler = FederationSenderHandler(hs)
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result) def on_rdata(self, stream_name, token, rows):
yield send_handler.process_replication(result) super(FederationSenderReplicationHandler, self).on_rdata(
except: stream_name, token, rows
logger.exception("Error replicating from %r", replication_url) )
yield sleep(30) self.send_handler.process_replication_rows(stream_name, token, rows)
if stream_name == "federation":
self.send_federation_ack(token)
def get_streams_to_replicate(self):
args = super(FederationSenderReplicationHandler, self).get_streams_to_replicate()
args.update(self.send_handler.stream_positions())
return args
def start(config_options): def start(config_options):
@ -205,7 +225,6 @@ def start(config_options):
reactor.run() reactor.run()
def start(): def start():
ps.replicate()
ps.get_datastore().start_profiling() ps.get_datastore().start_profiling()
ps.get_state_handler().start_caching() ps.get_state_handler().start_caching()
@ -233,6 +252,9 @@ class FederationSenderHandler(object):
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.federation_sender = hs.get_federation_sender() self.federation_sender = hs.get_federation_sender()
self.federation_position = self.store.federation_out_pos_startup
self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
self._room_serials = {} self._room_serials = {}
self._room_typing = {} self._room_typing = {}
@ -243,25 +265,13 @@ class FederationSenderHandler(object):
self.store.get_room_max_stream_ordering() self.store.get_room_max_stream_ordering()
) )
@defer.inlineCallbacks
def stream_positions(self): def stream_positions(self):
stream_id = yield self.store.get_federation_out_pos("federation") return {"federation": self.federation_position}
defer.returnValue({
"federation": stream_id,
# Ack stuff we've "processed", this should only be called from def process_replication_rows(self, stream_name, token, rows):
# one process.
"federation_ack": stream_id,
})
@defer.inlineCallbacks
def process_replication(self, result):
# The federation stream contains things that we want to send out, e.g. # The federation stream contains things that we want to send out, e.g.
# presence, typing, etc. # presence, typing, etc.
fed_stream = result.get("federation") if stream_name == "federation":
if fed_stream:
latest_id = int(fed_stream["position"])
# The federation stream containis a bunch of different types of # The federation stream containis a bunch of different types of
# rows that need to be handled differently. We parse the rows, put # rows that need to be handled differently. We parse the rows, put
# them into the appropriate collection and then send them off. # them into the appropriate collection and then send them off.
@ -272,8 +282,9 @@ class FederationSenderHandler(object):
device_destinations = set() device_destinations = set()
# Parse the rows in the stream # Parse the rows in the stream
for row in fed_stream["rows"]: for row in rows:
position, typ, content_js = row typ = row.type
content_js = row.data
content = json.loads(content_js) content = json.loads(content_js)
if typ == send_queue.PRESENCE_TYPE: if typ == send_queue.PRESENCE_TYPE:
@ -325,16 +336,19 @@ class FederationSenderHandler(object):
for destination in device_destinations: for destination in device_destinations:
self.federation_sender.send_device_messages(destination) self.federation_sender.send_device_messages(destination)
# Record where we are in the stream. self.update_token(token)
yield self.store.update_federation_out_pos(
"federation", latest_id
)
# We also need to poke the federation sender when new events happen # We also need to poke the federation sender when new events happen
event_stream = result.get("events") elif stream_name == "events":
if event_stream: self.federation_sender.notify_new_events(token)
latest_pos = event_stream["position"]
self.federation_sender.notify_new_events(latest_pos) @defer.inlineCallbacks
def update_token(self, token):
self.federation_position = token
with (yield self._fed_position_linearizer.queue(None)):
yield self.store.update_federation_out_pos(
"federation", self.federation_position
)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -25,13 +25,13 @@ from synapse.replication.slave.storage._base import BaseSlavedStore
from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore
from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.slave.storage.transactions import TransactionStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.rest.media.v0.content_repository import ContentRepoResource from synapse.rest.media.v0.content_repository import ContentRepoResource
from synapse.rest.media.v1.media_repository import MediaRepositoryResource from synapse.rest.media.v1.media_repository import MediaRepositoryResource
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.client_ips import ClientIpStore from synapse.storage.client_ips import ClientIpStore
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.storage.media_repository import MediaRepositoryStore from synapse.storage.media_repository import MediaRepositoryStore
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
@ -45,7 +45,7 @@ from synapse.crypto import context_factory
from synapse import events from synapse import events
from twisted.internet import reactor, defer from twisted.internet import reactor
from twisted.web.resource import Resource from twisted.web.resource import Resource
from daemonize import Daemonize from daemonize import Daemonize
@ -142,21 +142,10 @@ class MediaRepositoryServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
while True: def build_tcp_replication(self):
try: return ReplicationClientHandler(self.get_datastore())
args = store.stream_positions()
args["timeout"] = 30000
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result)
except:
logger.exception("Error replicating from %r", replication_url)
yield sleep(5)
def start(config_options): def start(config_options):
@ -206,7 +195,6 @@ def start(config_options):
def start(): def start():
ss.get_state_handler().start_caching() ss.get_state_handler().start_caching()
ss.get_datastore().start_profiling() ss.get_datastore().start_profiling()
ss.replicate()
reactor.callWhenRunning(start) reactor.callWhenRunning(start)

View File

@ -27,9 +27,9 @@ from synapse.replication.slave.storage.events import SlavedEventStore
from synapse.replication.slave.storage.pushers import SlavedPusherStore from synapse.replication.slave.storage.pushers import SlavedPusherStore
from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.storage import DataStore from synapse.storage import DataStore
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, preserve_fn, \ from synapse.util.logcontext import LoggingContext, preserve_fn, \
PreserveLoggingContext PreserveLoggingContext
@ -39,7 +39,7 @@ from synapse.util.versionstring import get_version_string
from synapse import events from synapse import events
from twisted.internet import reactor, defer from twisted.internet import reactor
from twisted.web.resource import Resource from twisted.web.resource import Resource
from daemonize import Daemonize from daemonize import Daemonize
@ -89,7 +89,6 @@ class PusherSlaveStore(
class PusherServer(HomeServer): class PusherServer(HomeServer):
def get_db_conn(self, run_new_connection=True): def get_db_conn(self, run_new_connection=True):
# Any param beginning with cp_ is a parameter for adbapi, and should # Any param beginning with cp_ is a parameter for adbapi, and should
# not be passed to the database engine. # not be passed to the database engine.
@ -109,16 +108,7 @@ class PusherServer(HomeServer):
logger.info("Finished setting up.") logger.info("Finished setting up.")
def remove_pusher(self, app_id, push_key, user_id): def remove_pusher(self, app_id, push_key, user_id):
http_client = self.get_simple_http_client() self.get_tcp_replication().send_remove_pusher(app_id, push_key, user_id)
replication_url = self.config.worker_replication_url
url = replication_url + "/remove_pushers"
return http_client.post_json_get_json(url, {
"remove": [{
"app_id": app_id,
"push_key": push_key,
"user_id": user_id,
}]
})
def _listen_http(self, listener_config): def _listen_http(self, listener_config):
port = listener_config["port"] port = listener_config["port"]
@ -166,73 +156,51 @@ class PusherServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
pusher_pool = self.get_pusherpool()
def stop_pusher(user_id, app_id, pushkey): def build_tcp_replication(self):
key = "%s:%s" % (app_id, pushkey) return PusherReplicationHandler(self)
pushers_for_user = pusher_pool.pushers.get(user_id, {})
pusher = pushers_for_user.pop(key, None)
if pusher is None:
return
logger.info("Stopping pusher %r / %r", user_id, key)
pusher.on_stop()
def start_pusher(user_id, app_id, pushkey):
key = "%s:%s" % (app_id, pushkey)
logger.info("Starting pusher %r / %r", user_id, key)
return pusher_pool._refresh_pusher(app_id, pushkey, user_id)
@defer.inlineCallbacks class PusherReplicationHandler(ReplicationClientHandler):
def poke_pushers(results): def __init__(self, hs):
pushers_rows = set( super(PusherReplicationHandler, self).__init__(hs.get_datastore())
map(tuple, results.get("pushers", {}).get("rows", []))
self.pusher_pool = hs.get_pusherpool()
def on_rdata(self, stream_name, token, rows):
super(PusherReplicationHandler, self).on_rdata(stream_name, token, rows)
self.poke_pushers(stream_name, token, rows)
def poke_pushers(self, stream_name, token, rows):
if stream_name == "pushers":
for row in rows:
if row.deleted:
self.stop_pusher(row.user_id, row.app_id, row.pushkey)
else:
self.start_pusher(row.user_id, row.app_id, row.pushkey)
elif stream_name == "events":
preserve_fn(self.pusher_pool.on_new_notifications)(
token, token,
) )
deleted_pushers_rows = set( elif stream_name == "receipts":
map(tuple, results.get("deleted_pushers", {}).get("rows", [])) preserve_fn(self.pusher_pool.on_new_receipts)(
token, token, set(row.room_id for row in rows)
) )
for row in sorted(pushers_rows | deleted_pushers_rows):
if row in deleted_pushers_rows:
user_id, app_id, pushkey = row[1:4]
stop_pusher(user_id, app_id, pushkey)
elif row in pushers_rows:
user_id = row[1]
app_id = row[5]
pushkey = row[8]
yield start_pusher(user_id, app_id, pushkey)
stream = results.get("events") def stop_pusher(self, user_id, app_id, pushkey):
if stream and stream["rows"]: key = "%s:%s" % (app_id, pushkey)
min_stream_id = stream["rows"][0][0] pushers_for_user = self.pusher_pool.pushers.get(user_id, {})
max_stream_id = stream["position"] pusher = pushers_for_user.pop(key, None)
preserve_fn(pusher_pool.on_new_notifications)( if pusher is None:
min_stream_id, max_stream_id return
) logger.info("Stopping pusher %r / %r", user_id, key)
pusher.on_stop()
stream = results.get("receipts") def start_pusher(self, user_id, app_id, pushkey):
if stream and stream["rows"]: key = "%s:%s" % (app_id, pushkey)
rows = stream["rows"] logger.info("Starting pusher %r / %r", user_id, key)
affected_room_ids = set(row[1] for row in rows) return self.pusher_pool._refresh_pusher(app_id, pushkey, user_id)
min_stream_id = rows[0][0]
max_stream_id = stream["position"]
preserve_fn(pusher_pool.on_new_receipts)(
min_stream_id, max_stream_id, affected_room_ids
)
while True:
try:
args = store.stream_positions()
args["timeout"] = 30000
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result)
poke_pushers(result)
except:
logger.exception("Error replicating from %r", replication_url)
yield sleep(30)
def start(config_options): def start(config_options):
@ -288,7 +256,6 @@ def start(config_options):
reactor.run() reactor.run()
def start(): def start():
ps.replicate()
ps.get_pusherpool().start() ps.get_pusherpool().start()
ps.get_datastore().start_profiling() ps.get_datastore().start_profiling()
ps.get_state_handler().start_caching() ps.get_state_handler().start_caching()

View File

@ -16,7 +16,7 @@
import synapse import synapse
from synapse.api.constants import EventTypes, PresenceState from synapse.api.constants import EventTypes
from synapse.config._base import ConfigError from synapse.config._base import ConfigError
from synapse.config.homeserver import HomeServerConfig from synapse.config.homeserver import HomeServerConfig
from synapse.config.logger import setup_logging from synapse.config.logger import setup_logging
@ -40,15 +40,14 @@ from synapse.replication.slave.storage.presence import SlavedPresenceStore
from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.devices import SlavedDeviceStore
from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.room import RoomStore
from synapse.replication.tcp.client import ReplicationClientHandler
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.client_ips import ClientIpStore from synapse.storage.client_ips import ClientIpStore
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.storage.presence import PresenceStore, UserPresenceState from synapse.storage.presence import PresenceStore, UserPresenceState
from synapse.storage.roommember import RoomMemberStore from synapse.storage.roommember import RoomMemberStore
from synapse.util.async import sleep
from synapse.util.httpresourcetree import create_resource_tree from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, preserve_fn, \ from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
PreserveLoggingContext
from synapse.util.manhole import manhole from synapse.util.manhole import manhole
from synapse.util.rlimit import change_resource_limit from synapse.util.rlimit import change_resource_limit
from synapse.util.stringutils import random_string from synapse.util.stringutils import random_string
@ -111,7 +110,6 @@ class SynchrotronPresence(object):
self.http_client = hs.get_simple_http_client() self.http_client = hs.get_simple_http_client()
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.user_to_num_current_syncs = {} self.user_to_num_current_syncs = {}
self.syncing_users_url = hs.config.worker_replication_url + "/syncing_users"
self.clock = hs.get_clock() self.clock = hs.get_clock()
self.notifier = hs.get_notifier() self.notifier = hs.get_notifier()
@ -124,14 +122,7 @@ class SynchrotronPresence(object):
self.process_id = random_string(16) self.process_id = random_string(16)
logger.info("Presence process_id is %r", self.process_id) logger.info("Presence process_id is %r", self.process_id)
self._sending_sync = False self.sync_callback = None
self._need_to_send_sync = False
self.clock.looping_call(
self._send_syncing_users_regularly,
UPDATE_SYNCING_USERS_MS,
)
reactor.addSystemEventTrigger("before", "shutdown", self._on_shutdown)
def set_state(self, user, state, ignore_status_msg=False): def set_state(self, user, state, ignore_status_msg=False):
# TODO Hows this supposed to work? # TODO Hows this supposed to work?
@ -142,15 +133,15 @@ class SynchrotronPresence(object):
_get_interested_parties = PresenceHandler._get_interested_parties.__func__ _get_interested_parties = PresenceHandler._get_interested_parties.__func__
current_state_for_users = PresenceHandler.current_state_for_users.__func__ current_state_for_users = PresenceHandler.current_state_for_users.__func__
@defer.inlineCallbacks
def user_syncing(self, user_id, affect_presence): def user_syncing(self, user_id, affect_presence):
if affect_presence: if affect_presence:
curr_sync = self.user_to_num_current_syncs.get(user_id, 0) curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
self.user_to_num_current_syncs[user_id] = curr_sync + 1 self.user_to_num_current_syncs[user_id] = curr_sync + 1
prev_states = yield self.current_state_for_users([user_id])
if prev_states[user_id].state == PresenceState.OFFLINE: if self.sync_callback:
# TODO: Don't block the sync request on this HTTP hit. if self.user_to_num_current_syncs[user_id] == 1:
yield self._send_syncing_users_now() now = self.clock.time_msec()
self.sync_callback(user_id, True, now)
def _end(): def _end():
# We check that the user_id is in user_to_num_current_syncs because # We check that the user_id is in user_to_num_current_syncs because
@ -159,6 +150,11 @@ class SynchrotronPresence(object):
if affect_presence and user_id in self.user_to_num_current_syncs: if affect_presence and user_id in self.user_to_num_current_syncs:
self.user_to_num_current_syncs[user_id] -= 1 self.user_to_num_current_syncs[user_id] -= 1
if self.sync_callback:
if self.user_to_num_current_syncs[user_id] == 0:
now = self.clock.time_msec()
self.sync_callback(user_id, False, now)
@contextlib.contextmanager @contextlib.contextmanager
def _user_syncing(): def _user_syncing():
try: try:
@ -166,49 +162,7 @@ class SynchrotronPresence(object):
finally: finally:
_end() _end()
defer.returnValue(_user_syncing()) return defer.succeed(_user_syncing())
@defer.inlineCallbacks
def _on_shutdown(self):
# When the synchrotron is shutdown tell the master to clear the in
# progress syncs for this process
self.user_to_num_current_syncs.clear()
yield self._send_syncing_users_now()
def _send_syncing_users_regularly(self):
# Only send an update if we aren't in the middle of sending one.
if not self._sending_sync:
preserve_fn(self._send_syncing_users_now)()
@defer.inlineCallbacks
def _send_syncing_users_now(self):
if self._sending_sync:
# We don't want to race with sending another update.
# Instead we wait for that update to finish and send another
# update afterwards.
self._need_to_send_sync = True
return
# Flag that we are sending an update.
self._sending_sync = True
yield self.http_client.post_json_get_json(self.syncing_users_url, {
"process_id": self.process_id,
"syncing_users": [
user_id for user_id, count in self.user_to_num_current_syncs.items()
if count > 0
],
})
# Unset the flag as we are no longer sending an update.
self._sending_sync = False
if self._need_to_send_sync:
# If something happened while we were sending the update then
# we might need to send another update.
# TODO: Check if the update that was sent matches the current state
# as we only need to send an update if they are different.
self._need_to_send_sync = False
yield self._send_syncing_users_now()
@defer.inlineCallbacks @defer.inlineCallbacks
def notify_from_replication(self, states, stream_id): def notify_from_replication(self, states, stream_id):
@ -223,26 +177,24 @@ class SynchrotronPresence(object):
) )
@defer.inlineCallbacks @defer.inlineCallbacks
def process_replication(self, result): def process_replication_rows(self, token, rows):
stream = result.get("presence", {"rows": []}) states = [UserPresenceState(
states = [] row.user_id, row.state, row.last_active_ts,
for row in stream["rows"]: row.last_federation_update_ts, row.last_user_sync_ts, row.status_msg,
( row.currently_active
position, user_id, state, last_active_ts, ) for row in rows]
last_federation_update_ts, last_user_sync_ts, status_msg,
currently_active
) = row
state = UserPresenceState(
user_id, state, last_active_ts,
last_federation_update_ts, last_user_sync_ts, status_msg,
currently_active
)
self.user_to_current_state[user_id] = state
states.append(state)
if states and "position" in stream: for state in states:
stream_id = int(stream["position"]) self.user_to_current_state[row.user_id] = state
yield self.notify_from_replication(states, stream_id)
stream_id = token
yield self.notify_from_replication(states, stream_id)
def get_currently_syncing_users(self):
return [
user_id for user_id, count in self.user_to_num_current_syncs.iteritems()
if count > 0
]
class SynchrotronTyping(object): class SynchrotronTyping(object):
@ -257,16 +209,13 @@ class SynchrotronTyping(object):
# value which we *must* use for the next replication request. # value which we *must* use for the next replication request.
return {"typing": self._latest_room_serial} return {"typing": self._latest_room_serial}
def process_replication(self, result): def process_replication_rows(self, token, rows):
stream = result.get("typing") self._latest_room_serial = token
if stream:
self._latest_room_serial = int(stream["position"])
for row in stream["rows"]: for row in rows:
position, room_id, typing_json = row typing = json.loads(row.user_ids)
typing = json.loads(typing_json) self._room_serials[row.room_id] = token
self._room_serials[room_id] = position self._room_typing[row.room_id] = typing
self._room_typing[room_id] = typing
class SynchrotronApplicationService(object): class SynchrotronApplicationService(object):
@ -351,118 +300,10 @@ class SynchrotronServer(HomeServer):
else: else:
logger.warn("Unrecognized listener type: %s", listener["type"]) logger.warn("Unrecognized listener type: %s", listener["type"])
@defer.inlineCallbacks self.get_tcp_replication().start_replication(self)
def replicate(self):
http_client = self.get_simple_http_client()
store = self.get_datastore()
replication_url = self.config.worker_replication_url
notifier = self.get_notifier()
presence_handler = self.get_presence_handler()
typing_handler = self.get_typing_handler()
def notify_from_stream( def build_tcp_replication(self):
result, stream_name, stream_key, room=None, user=None return SyncReplicationHandler(self)
):
stream = result.get(stream_name)
if stream:
position_index = stream["field_names"].index("position")
if room:
room_index = stream["field_names"].index(room)
if user:
user_index = stream["field_names"].index(user)
users = ()
rooms = ()
for row in stream["rows"]:
position = row[position_index]
if user:
users = (row[user_index],)
if room:
rooms = (row[room_index],)
notifier.on_new_event(
stream_key, position, users=users, rooms=rooms
)
@defer.inlineCallbacks
def notify_device_list_update(result):
stream = result.get("device_lists")
if not stream:
return
position_index = stream["field_names"].index("position")
user_index = stream["field_names"].index("user_id")
for row in stream["rows"]:
position = row[position_index]
user_id = row[user_index]
room_ids = yield store.get_rooms_for_user(user_id)
notifier.on_new_event(
"device_list_key", position, rooms=room_ids,
)
@defer.inlineCallbacks
def notify(result):
stream = result.get("events")
if stream:
max_position = stream["position"]
event_map = yield store.get_events([row[1] for row in stream["rows"]])
for row in stream["rows"]:
position = row[0]
event_id = row[1]
event = event_map.get(event_id, None)
if not event:
continue
extra_users = ()
if event.type == EventTypes.Member:
extra_users = (event.state_key,)
notifier.on_new_room_event(
event, position, max_position, extra_users
)
notify_from_stream(
result, "push_rules", "push_rules_key", user="user_id"
)
notify_from_stream(
result, "user_account_data", "account_data_key", user="user_id"
)
notify_from_stream(
result, "room_account_data", "account_data_key", user="user_id"
)
notify_from_stream(
result, "tag_account_data", "account_data_key", user="user_id"
)
notify_from_stream(
result, "receipts", "receipt_key", room="room_id"
)
notify_from_stream(
result, "typing", "typing_key", room="room_id"
)
notify_from_stream(
result, "to_device", "to_device_key", user="user_id"
)
yield notify_device_list_update(result)
while True:
try:
args = store.stream_positions()
args.update(typing_handler.stream_positions())
args["timeout"] = 30000
result = yield http_client.get_json(replication_url, args=args)
yield store.process_replication(result)
typing_handler.process_replication(result)
yield presence_handler.process_replication(result)
yield notify(result)
except:
logger.exception("Error replicating from %r", replication_url)
yield sleep(5)
def build_presence_handler(self): def build_presence_handler(self):
return SynchrotronPresence(self) return SynchrotronPresence(self)
@ -471,6 +312,80 @@ class SynchrotronServer(HomeServer):
return SynchrotronTyping(self) return SynchrotronTyping(self)
class SyncReplicationHandler(ReplicationClientHandler):
def __init__(self, hs):
super(SyncReplicationHandler, self).__init__(hs.get_datastore())
self.store = hs.get_datastore()
self.typing_handler = hs.get_typing_handler()
self.presence_handler = hs.get_presence_handler()
self.notifier = hs.get_notifier()
self.presence_handler.sync_callback = self.send_user_sync
def on_rdata(self, stream_name, token, rows):
super(SyncReplicationHandler, self).on_rdata(stream_name, token, rows)
if stream_name == "typing":
self.typing_handler.process_replication_rows(token, rows)
elif stream_name == "presence":
self.presence_handler.process_replication_rows(token, rows)
self.notify(stream_name, token, rows)
def get_streams_to_replicate(self):
args = super(SyncReplicationHandler, self).get_streams_to_replicate()
args.update(self.typing_handler.stream_positions())
return args
def get_currently_syncing_users(self):
return self.presence_handler.get_currently_syncing_users()
@defer.inlineCallbacks
def notify(self, stream_name, token, rows):
if stream_name == "events":
# We shouldn't get multiple rows per token for events stream, so
# we don't need to optimise this for multiple rows.
for row in rows:
event = yield self.store.get_event(row.event_id)
extra_users = ()
if event.type == EventTypes.Member:
extra_users = (event.state_key,)
max_token = self.store.get_room_max_stream_ordering()
self.notifier.on_new_room_event(
event, token, max_token, extra_users
)
elif stream_name == "push_rules":
self.notifier.on_new_event(
"push_rules_key", token, users=[row.user_id for row in rows],
)
elif stream_name in ("account_data", "tag_account_data",):
self.notifier.on_new_event(
"account_data_key", token, users=[row.user_id for row in rows],
)
elif stream_name == "receipts":
self.notifier.on_new_event(
"receipt_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "typing":
self.notifier.on_new_event(
"typing_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "to_device":
entities = [row.entity for row in rows if row.entity.startswith("@")]
if entities:
self.notifier.on_new_event(
"to_device_key", token, users=entities,
)
elif stream_name == "device_lists":
all_room_ids = set()
for row in rows:
room_ids = yield self.store.get_rooms_for_user(row.user_id)
all_room_ids.update(room_ids)
self.notifier.on_new_event(
"device_list_key", token, rooms=all_room_ids,
)
def start(config_options): def start(config_options):
try: try:
config = HomeServerConfig.load_config( config = HomeServerConfig.load_config(
@ -514,7 +429,6 @@ def start(config_options):
def start(): def start():
ss.get_datastore().start_profiling() ss.get_datastore().start_profiling()
ss.replicate()
ss.get_state_handler().start_caching() ss.get_state_handler().start_caching()
reactor.callWhenRunning(start) reactor.callWhenRunning(start)

View File

@ -29,6 +29,9 @@ class WorkerConfig(Config):
self.worker_log_file = config.get("worker_log_file") self.worker_log_file = config.get("worker_log_file")
self.worker_log_config = config.get("worker_log_config") self.worker_log_config = config.get("worker_log_config")
self.worker_replication_url = config.get("worker_replication_url") self.worker_replication_url = config.get("worker_replication_url")
self.worker_replication_host = config.get("worker_replication_host", None)
self.worker_replication_port = config.get("worker_replication_port", None)
self.worker_name = config.get("worker_name", self.worker_app)
if self.worker_listeners: if self.worker_listeners:
for listener in self.worker_listeners: for listener in self.worker_listeners:

View File

@ -132,6 +132,7 @@ class HomeServer(object):
'federation_sender', 'federation_sender',
'receipts_handler', 'receipts_handler',
'macaroon_generator', 'macaroon_generator',
'tcp_replication',
] ]
def __init__(self, hostname, **kwargs): def __init__(self, hostname, **kwargs):
@ -290,6 +291,9 @@ class HomeServer(object):
def build_receipts_handler(self): def build_receipts_handler(self):
return ReceiptsHandler(self) return ReceiptsHandler(self)
def build_tcp_replication(self):
raise NotImplementedError()
def remove_pusher(self, app_id, push_key, user_id): def remove_pusher(self, app_id, push_key, user_id):
return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)