diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md index 5cf844bfb..9dd05bcba 100644 --- a/.github/ISSUE_TEMPLATE/BUG_REPORT.md +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md @@ -7,7 +7,7 @@ about: Create a report to help us improve -- **Homeserver**: +- **Homeserver**: If not matrix.org: -- **Version**: + What version of Synapse is running? -- **Install method**: +You can find the Synapse version with this command: + +$ curl http://localhost:8008/_synapse/admin/v1/server_version + +(You may need to replace `localhost:8008` if Synapse is not configured to +listen on that port.) +--> +- **Version**: + +- **Install method**: -- **Platform**: +- **Platform**: rest/* <=> Client + | | Rooms*Handler | | + HS <=> federation/* <==> FederationHandler | | + | | | PresenceHandler | | + | | | TypingHandler | | + | | '-------------------' | + | | | | | + | | state/* | | + | | | | | + | | v v | + | `--------------> storage/* | + | | | + '--------------------------|--------------------------' + v + .----. + | DB | + '----' + +- Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: + - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): + - handle_state(event) + - auth(event) + - persist_event(event) + - notify notifier or federation(event) + - PresenceHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - TypingHandler: use distributor to get EDUs out of Federation. + Very lightweight logic built on the distributor + - EventsHandler: handles the events stream... + - FederationHandler: - gets PDU from Federation Layer; turns into + an event; follows basehandler functionality. + - RoomsHandler: does all the room logic, including members - lots + of classes in RoomsHandler. + - ProfileHandler: talks to the storage to store/retrieve profile + info. +- EventFactory: generates events of particular event types. +- Notifier: Backs the events handler +- REST: Interfaces handlers and events to the outside world via + HTTP/JSON. Converts events back and forth from JSON. +- Federation: holds the HTTP client & server to talk to other servers. + Does replication to make sure there's nothing missing in the graph. + Handles reliability. Handles txns. +- Distributor: generic event bus. used for presence & typing only + currently. Notifier could be implemented using Distributor - so far + we are only using for things which actually /require/ dynamic + pluggability however as it can obfuscate the actual flow of control. +- Auth: helper singleton to say whether a given event is allowed to do + a given thing (TODO: put this on the diagram) +- State: helper singleton: does state conflict resolution. You give it + an event and it tells you if it actually updates the state or not, + and annotates the event up properly and handles merge conflict + resolution. +- Storage: abstracts the storage engine. diff --git a/docs/architecture.rst b/docs/architecture.rst deleted file mode 100644 index 98050428b..000000000 --- a/docs/architecture.rst +++ /dev/null @@ -1,68 +0,0 @@ -Synapse Architecture -==================== - -As of the end of Oct 2014, Synapse's overall architecture looks like:: - - synapse - .-----------------------------------------------------. - | Notifier | - | ^ | | - | | | | - | .------------|------. | - | | handlers/ | | | - | | v | | - | | Event*Handler <--------> rest/* <=> Client - | | Rooms*Handler | | - HSes <=> federation/* <==> FederationHandler | | - | | | PresenceHandler | | - | | | TypingHandler | | - | | '-------------------' | - | | | | | - | | state/* | | - | | | | | - | | v v | - | `--------------> storage/* | - | | | - '--------------------------|--------------------------' - v - .----. - | DB | - '----' - -* Handlers: business logic of synapse itself. Follows a set contract of BaseHandler: - - - BaseHandler gives us onNewRoomEvent which: (TODO: flesh this out and make it less cryptic): - - + handle_state(event) - + auth(event) - + persist_event(event) - + notify notifier or federation(event) - - - PresenceHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - TypingHandler: use distributor to get EDUs out of Federation. Very - lightweight logic built on the distributor - - EventsHandler: handles the events stream... - - FederationHandler: - gets PDU from Federation Layer; turns into an event; - follows basehandler functionality. - - RoomsHandler: does all the room logic, including members - lots of classes in - RoomsHandler. - - ProfileHandler: talks to the storage to store/retrieve profile info. - -* EventFactory: generates events of particular event types. -* Notifier: Backs the events handler -* REST: Interfaces handlers and events to the outside world via HTTP/JSON. - Converts events back and forth from JSON. -* Federation: holds the HTTP client & server to talk to other servers. Does - replication to make sure there's nothing missing in the graph. Handles - reliability. Handles txns. -* Distributor: generic event bus. used for presence & typing only currently. - Notifier could be implemented using Distributor - so far we are only using for - things which actually /require/ dynamic pluggability however as it can - obfuscate the actual flow of control. -* Auth: helper singleton to say whether a given event is allowed to do a given - thing (TODO: put this on the diagram) -* State: helper singleton: does state conflict resolution. You give it an event - and it tells you if it actually updates the state or not, and annotates the - event up properly and handles merge conflict resolution. -* Storage: abstracts the storage engine. diff --git a/docs/code_style.md b/docs/code_style.md new file mode 100644 index 000000000..f983f72d6 --- /dev/null +++ b/docs/code_style.md @@ -0,0 +1,169 @@ +# Code Style + +## Formatting tools + +The Synapse codebase uses a number of code formatting tools in order to +quickly and automatically check for formatting (and sometimes logical) +errors in code. + +The necessary tools are detailed below. + +- **black** + + The Synapse codebase uses [black](https://pypi.org/project/black/) + as an opinionated code formatter, ensuring all comitted code is + properly formatted. + + First install `black` with: + + pip install --upgrade black + + Have `black` auto-format your code (it shouldn't change any + functionality) with: + + black . --exclude="\.tox|build|env" + +- **flake8** + + `flake8` is a code checking tool. We require code to pass `flake8` + before being merged into the codebase. + + Install `flake8` with: + + pip install --upgrade flake8 + + Check all application and test code with: + + flake8 synapse tests + +- **isort** + + `isort` ensures imports are nicely formatted, and can suggest and + auto-fix issues such as double-importing. + + Install `isort` with: + + pip install --upgrade isort + + Auto-fix imports with: + + isort -rc synapse tests + + `-rc` means to recursively search the given directories. + +It's worth noting that modern IDEs and text editors can run these tools +automatically on save. It may be worth looking into whether this +functionality is supported in your editor for a more convenient +development workflow. It is not, however, recommended to run `flake8` on +save as it takes a while and is very resource intensive. + +## General rules + +- **Naming**: + - Use camel case for class and type names + - Use underscores for functions and variables. +- **Docstrings**: should follow the [google code + style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings). + This is so that we can generate documentation with + [sphinx](http://sphinxcontrib-napoleon.readthedocs.org/en/latest/). + See the + [examples](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) + in the sphinx documentation. +- **Imports**: + - Imports should be sorted by `isort` as described above. + - Prefer to import classes and functions rather than packages or + modules. + + Example: + + from synapse.types import UserID + ... + user_id = UserID(local, server) + + is preferred over: + + from synapse import types + ... + user_id = types.UserID(local, server) + + (or any other variant). + + This goes against the advice in the Google style guide, but it + means that errors in the name are caught early (at import time). + + - Avoid wildcard imports (`from synapse.types import *`) and + relative imports (`from .types import UserID`). + +## Configuration file format + +The [sample configuration file](./sample_config.yaml) acts as a +reference to Synapse's configuration options for server administrators. +Remember that many readers will be unfamiliar with YAML and server +administration in general, so that it is important that the file be as +easy to understand as possible, which includes following a consistent +format. + +Some guidelines follow: + +- Sections should be separated with a heading consisting of a single + line prefixed and suffixed with `##`. There should be **two** blank + lines before the section header, and **one** after. +- Each option should be listed in the file with the following format: + - A comment describing the setting. Each line of this comment + should be prefixed with a hash (`#`) and a space. + + The comment should describe the default behaviour (ie, what + happens if the setting is omitted), as well as what the effect + will be if the setting is changed. + + Often, the comment end with something like "uncomment the + following to ". + + - A line consisting of only `#`. + - A commented-out example setting, prefixed with only `#`. + + For boolean (on/off) options, convention is that this example + should be the *opposite* to the default (so the comment will end + with "Uncomment the following to enable [or disable] + ." For other options, the example should give some + non-default value which is likely to be useful to the reader. + +- There should be a blank line between each option. +- Where several settings are grouped into a single dict, *avoid* the + convention where the whole block is commented out, resulting in + comment lines starting `# #`, as this is hard to read and confusing + to edit. Instead, leave the top-level config option uncommented, and + follow the conventions above for sub-options. Ensure that your code + correctly handles the top-level option being set to `None` (as it + will be if no sub-options are enabled). +- Lines should be wrapped at 80 characters. + +Example: + + ## Frobnication ## + + # The frobnicator will ensure that all requests are fully frobnicated. + # To enable it, uncomment the following. + # + #frobnicator_enabled: true + + # By default, the frobnicator will frobnicate with the default frobber. + # The following will make it use an alternative frobber. + # + #frobincator_frobber: special_frobber + + # Settings for the frobber + # + frobber: + # frobbing speed. Defaults to 1. + # + #speed: 10 + + # frobbing distance. Defaults to 1000. + # + #distance: 100 + +Note that the sample configuration is generated from the synapse code +and is maintained by a script, `scripts-dev/generate_sample_config`. +Making sure that the output from this script matches the desired format +is left as an exercise for the reader! diff --git a/docs/code_style.rst b/docs/code_style.rst deleted file mode 100644 index 39ac4ebed..000000000 --- a/docs/code_style.rst +++ /dev/null @@ -1,180 +0,0 @@ -Code Style -========== - -Formatting tools ----------------- - -The Synapse codebase uses a number of code formatting tools in order to -quickly and automatically check for formatting (and sometimes logical) errors -in code. - -The necessary tools are detailed below. - -- **black** - - The Synapse codebase uses `black `_ as an - opinionated code formatter, ensuring all comitted code is properly - formatted. - - First install ``black`` with:: - - pip install --upgrade black - - Have ``black`` auto-format your code (it shouldn't change any functionality) - with:: - - black . --exclude="\.tox|build|env" - -- **flake8** - - ``flake8`` is a code checking tool. We require code to pass ``flake8`` before being merged into the codebase. - - Install ``flake8`` with:: - - pip install --upgrade flake8 - - Check all application and test code with:: - - flake8 synapse tests - -- **isort** - - ``isort`` ensures imports are nicely formatted, and can suggest and - auto-fix issues such as double-importing. - - Install ``isort`` with:: - - pip install --upgrade isort - - Auto-fix imports with:: - - isort -rc synapse tests - - ``-rc`` means to recursively search the given directories. - -It's worth noting that modern IDEs and text editors can run these tools -automatically on save. It may be worth looking into whether this -functionality is supported in your editor for a more convenient development -workflow. It is not, however, recommended to run ``flake8`` on save as it -takes a while and is very resource intensive. - -General rules -------------- - -- **Naming**: - - - Use camel case for class and type names - - Use underscores for functions and variables. - -- **Docstrings**: should follow the `google code style - `_. - This is so that we can generate documentation with `sphinx - `_. See the - `examples - `_ - in the sphinx documentation. - -- **Imports**: - - - Imports should be sorted by ``isort`` as described above. - - - Prefer to import classes and functions rather than packages or modules. - - Example:: - - from synapse.types import UserID - ... - user_id = UserID(local, server) - - is preferred over:: - - from synapse import types - ... - user_id = types.UserID(local, server) - - (or any other variant). - - This goes against the advice in the Google style guide, but it means that - errors in the name are caught early (at import time). - - - Avoid wildcard imports (``from synapse.types import *``) and relative - imports (``from .types import UserID``). - -Configuration file format -------------------------- - -The `sample configuration file <./sample_config.yaml>`_ acts as a reference to -Synapse's configuration options for server administrators. Remember that many -readers will be unfamiliar with YAML and server administration in general, so -that it is important that the file be as easy to understand as possible, which -includes following a consistent format. - -Some guidelines follow: - -* Sections should be separated with a heading consisting of a single line - prefixed and suffixed with ``##``. There should be **two** blank lines - before the section header, and **one** after. - -* Each option should be listed in the file with the following format: - - * A comment describing the setting. Each line of this comment should be - prefixed with a hash (``#``) and a space. - - The comment should describe the default behaviour (ie, what happens if - the setting is omitted), as well as what the effect will be if the - setting is changed. - - Often, the comment end with something like "uncomment the - following to \". - - * A line consisting of only ``#``. - - * A commented-out example setting, prefixed with only ``#``. - - For boolean (on/off) options, convention is that this example should be - the *opposite* to the default (so the comment will end with "Uncomment - the following to enable [or disable] \." For other options, - the example should give some non-default value which is likely to be - useful to the reader. - -* There should be a blank line between each option. - -* Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in comment - lines starting ``# #``, as this is hard to read and confusing to - edit. Instead, leave the top-level config option uncommented, and follow - the conventions above for sub-options. Ensure that your code correctly - handles the top-level option being set to ``None`` (as it will be if no - sub-options are enabled). - -* Lines should be wrapped at 80 characters. - -Example:: - - ## Frobnication ## - - # The frobnicator will ensure that all requests are fully frobnicated. - # To enable it, uncomment the following. - # - #frobnicator_enabled: true - - # By default, the frobnicator will frobnicate with the default frobber. - # The following will make it use an alternative frobber. - # - #frobincator_frobber: special_frobber - - # Settings for the frobber - # - frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 - -Note that the sample configuration is generated from the synapse code and is -maintained by a script, ``scripts-dev/generate_sample_config``. Making sure -that the output from this script matches the desired format is left as an -exercise for the reader! diff --git a/docs/dev/saml.md b/docs/dev/saml.md new file mode 100644 index 000000000..f41aadce4 --- /dev/null +++ b/docs/dev/saml.md @@ -0,0 +1,37 @@ +# How to test SAML as a developer without a server + +https://capriza.github.io/samling/samling.html (https://github.com/capriza/samling) is a great +resource for being able to tinker with the SAML options within Synapse without needing to +deploy and configure a complicated software stack. + +To make Synapse (and therefore Riot) use it: + +1. Use the samling.html URL above or deploy your own and visit the IdP Metadata tab. +2. Copy the XML to your clipboard. +3. On your Synapse server, create a new file `samling.xml` next to your `homeserver.yaml` with + the XML from step 2 as the contents. +4. Edit your `homeserver.yaml` to include: + ```yaml + saml2_config: + sp_config: + allow_unknown_attributes: true # Works around a bug with AVA Hashes: https://github.com/IdentityPython/pysaml2/issues/388 + metadata: + local: ["samling.xml"] + ``` +5. Run `apt-get install xmlsec1` and `pip install --upgrade --force 'pysaml2>=4.5.0'` to ensure + the dependencies are installed and ready to go. +6. Restart Synapse. + +Then in Riot: + +1. Visit the login page with a Riot pointing at your homeserver. +2. Click the Single Sign-On button. +3. On the samling page, enter a Name Identifier and add a SAML Attribute for `uid=your_localpart`. + The response must also be signed. +4. Click "Next". +5. Click "Post Response" (change nothing). +6. You should be logged in. + +If you try and repeat this process, you may be automatically logged in using the information you +gave previously. To fix this, open your developer console (`F12` or `Ctrl+Shift+I`) while on the +samling page and clear the site data. In Chrome, this will be a button on the Application tab. diff --git a/docs/federate.md b/docs/federate.md index 6d6bb85e1..193e2d2df 100644 --- a/docs/federate.md +++ b/docs/federate.md @@ -148,7 +148,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will find it easier to direct federation traffic to a reverse proxy and manage their own TLS certificates, and this is a supported configuration. -See [reverse_proxy.rst](reverse_proxy.rst) for information on setting up a +See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. #### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy? @@ -184,7 +184,7 @@ a complicated dance which requires connections in both directions). Another common problem is that people on other servers can't join rooms that you invite them to. This can be caused by an incorrectly-configured reverse -proxy: see [reverse_proxy.rst]() for instructions on how to correctly +proxy: see [reverse_proxy.md]() for instructions on how to correctly configure a reverse proxy. ## Running a Demo Federation of Synapses diff --git a/docs/log_contexts.md b/docs/log_contexts.md new file mode 100644 index 000000000..5331e8c88 --- /dev/null +++ b/docs/log_contexts.md @@ -0,0 +1,494 @@ +# Log Contexts + +To help track the processing of individual requests, synapse uses a +'`log context`' to track which request it is handling at any given +moment. This is done via a thread-local variable; a `logging.Filter` is +then used to fish the information back out of the thread-local variable +and add it to each log record. + +Logcontexts are also used for CPU and database accounting, so that we +can track which requests were responsible for high CPU use or database +activity. + +The `synapse.logging.context` module provides a facilities for managing +the current log context (as well as providing the `LoggingContextFilter` +class). + +Deferreds make the whole thing complicated, so this document describes +how it all works, and how to write code which follows the rules. + +##Logcontexts without Deferreds + +In the absence of any Deferred voodoo, things are simple enough. As with +any code of this nature, the rule is that our function should leave +things as it found them: + +```python +from synapse.logging import context # omitted from future snippets + +def handle_request(request_id): + request_context = context.LoggingContext() + + calling_context = context.LoggingContext.current_context() + context.LoggingContext.set_current_context(request_context) + try: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + finally: + context.LoggingContext.set_current_context(calling_context) + +def do_request_handling(): + logger.debug("phew") # this will be logged against request_id +``` + +LoggingContext implements the context management methods, so the above +can be written much more succinctly as: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + do_request_handling() + logger.debug("finished") + +def do_request_handling(): + logger.debug("phew") +``` + +## Using logcontexts with Deferreds + +Deferreds --- and in particular, `defer.inlineCallbacks` --- break the +linear flow of code so that there is no longer a single entry point +where we should set the logcontext and a single exit point where we +should remove it. + +Consider the example above, where `do_request_handling` needs to do some +blocking operation, and returns a deferred: + +```python +@defer.inlineCallbacks +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + yield do_request_handling() + logger.debug("finished") +``` + +In the above flow: + +- The logcontext is set +- `do_request_handling` is called, and returns a deferred +- `handle_request` yields the deferred +- The `inlineCallbacks` wrapper of `handle_request` returns a deferred + +So we have stopped processing the request (and will probably go on to +start processing the next), without clearing the logcontext. + +To circumvent this problem, synapse code assumes that, wherever you have +a deferred, you will want to yield on it. To that end, whereever +functions return a deferred, we adopt the following conventions: + +**Rules for functions returning deferreds:** + +> - If the deferred is already complete, the function returns with the +> same logcontext it started with. +> - If the deferred is incomplete, the function clears the logcontext +> before returning; when the deferred completes, it restores the +> logcontext before running any callbacks. + +That sounds complicated, but actually it means a lot of code (including +the example above) "just works". There are two cases: + +- If `do_request_handling` returns a completed deferred, then the + logcontext will still be in place. In this case, execution will + continue immediately after the `yield`; the "finished" line will + be logged against the right context, and the `with` block restores + the original context before we return to the caller. +- If the returned deferred is incomplete, `do_request_handling` clears + the logcontext before returning. The logcontext is therefore clear + when `handle_request` yields the deferred. At that point, the + `inlineCallbacks` wrapper adds a callback to the deferred, and + returns another (incomplete) deferred to the caller, and it is safe + to begin processing the next request. + + Once `do_request_handling`'s deferred completes, it will reinstate + the logcontext, before running the callback added by the + `inlineCallbacks` wrapper. That callback runs the second half of + `handle_request`, so again the "finished" line will be logged + against the right context, and the `with` block restores the + original context. + +As an aside, it's worth noting that `handle_request` follows our rules +-though that only matters if the caller has its own logcontext which it +cares about. + +The following sections describe pitfalls and helpful patterns when +implementing these rules. + +Always yield your deferreds +--------------------------- + +Whenever you get a deferred back from a function, you should `yield` on +it as soon as possible. (Returning it directly to your caller is ok too, +if you're not doing `inlineCallbacks`.) Do not pass go; do not do any +logging; do not call any other functions. + +```python +@defer.inlineCallbacks +def fun(): + logger.debug("starting") + yield do_some_stuff() # just like this + + d = more_stuff() + result = yield d # also fine, of course + + return result + +def nonInlineCallbacksFun(): + logger.debug("just a wrapper really") + return do_some_stuff() # this is ok too - the caller will yield on + # it anyway. +``` + +Provided this pattern is followed all the way back up to the callchain +to where the logcontext was set, this will make things work out ok: +provided `do_some_stuff` and `more_stuff` follow the rules above, then +so will `fun` (as wrapped by `inlineCallbacks`) and +`nonInlineCallbacksFun`. + +It's all too easy to forget to `yield`: for instance if we forgot that +`do_some_stuff` returned a deferred, we might plough on regardless. This +leads to a mess; it will probably work itself out eventually, but not +before a load of stuff has been logged against the wrong context. +(Normally, other things will break, more obviously, if you forget to +`yield`, so this tends not to be a major problem in practice.) + +Of course sometimes you need to do something a bit fancier with your +Deferreds - not all code follows the linear A-then-B-then-C pattern. +Notes on implementing more complex patterns are in later sections. + +## Where you create a new Deferred, make it follow the rules + +Most of the time, a Deferred comes from another synapse function. +Sometimes, though, we need to make up a new Deferred, or we get a +Deferred back from external code. We need to make it follow our rules. + +The easy way to do it is with a combination of `defer.inlineCallbacks`, +and `context.PreserveLoggingContext`. Suppose we want to implement +`sleep`, which returns a deferred which will run its callbacks after a +given number of seconds. That might look like: + +```python +# not a logcontext-rules-compliant function +def get_sleep_deferred(seconds): + d = defer.Deferred() + reactor.callLater(seconds, d.callback, None) + return d +``` + +That doesn't follow the rules, but we can fix it by wrapping it with +`PreserveLoggingContext` and `yield` ing on it: + +```python +@defer.inlineCallbacks +def sleep(seconds): + with PreserveLoggingContext(): + yield get_sleep_deferred(seconds) +``` + +This technique works equally for external functions which return +deferreds, or deferreds we have made ourselves. + +You can also use `context.make_deferred_yieldable`, which just does the +boilerplate for you, so the above could be written: + +```python +def sleep(seconds): + return context.make_deferred_yieldable(get_sleep_deferred(seconds)) +``` + +## Fire-and-forget + +Sometimes you want to fire off a chain of execution, but not wait for +its result. That might look a bit like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # *don't* do this + background_operation() + + logger.debug("Request handling complete") + +@defer.inlineCallbacks +def background_operation(): + yield first_background_step() + logger.debug("Completed first step") + yield second_background_step() + logger.debug("Completed second step") +``` + +The above code does a couple of steps in the background after +`do_request_handling` has finished. The log lines are still logged +against the `request_context` logcontext, which may or may not be +desirable. There are two big problems with the above, however. The first +problem is that, if `background_operation` returns an incomplete +Deferred, it will expect its caller to `yield` immediately, so will have +cleared the logcontext. In this example, that means that 'Request +handling complete' will be logged without any context. + +The second problem, which is potentially even worse, is that when the +Deferred returned by `background_operation` completes, it will restore +the original logcontext. There is nothing waiting on that Deferred, so +the logcontext will leak into the reactor and possibly get attached to +some arbitrary future operation. + +There are two potential solutions to this. + +One option is to surround the call to `background_operation` with a +`PreserveLoggingContext` call. That will reset the logcontext before +starting `background_operation` (so the context restored when the +deferred completes will be the empty logcontext), and will restore the +current logcontext before continuing the foreground process: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + # start background_operation off in the empty logcontext, to + # avoid leaking the current context into the reactor. + with PreserveLoggingContext(): + background_operation() + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +Obviously that option means that the operations done in +`background_operation` would be not be logged against a logcontext +(though that might be fixed by setting a different logcontext via a +`with LoggingContext(...)` in `background_operation`). + +The second option is to use `context.run_in_background`, which wraps a +function so that it doesn't reset the logcontext even when it returns +an incomplete deferred, and adds a callback to the returned deferred to +reset the logcontext. In other words, it turns a function that follows +the Synapse rules about logcontexts and Deferreds into one which behaves +more like an external function --- the opposite operation to that +described in the previous section. It can be used like this: + +```python +@defer.inlineCallbacks +def do_request_handling(): + yield foreground_operation() + + context.run_in_background(background_operation) + + # this will now be logged against the request context + logger.debug("Request handling complete") +``` + +## Passing synapse deferreds into third-party functions + +A typical example of this is where we want to collect together two or +more deferred via `defer.gatherResults`: + +```python +d1 = operation1() +d2 = operation2() +d3 = defer.gatherResults([d1, d2]) +``` + +This is really a variation of the fire-and-forget problem above, in that +we are firing off `d1` and `d2` without yielding on them. The difference +is that we now have third-party code attached to their callbacks. Anyway +either technique given in the [Fire-and-forget](#fire-and-forget) +section will work. + +Of course, the new Deferred returned by `gatherResults` needs to be +wrapped in order to make it follow the logcontext rules before we can +yield it, as described in [Where you create a new Deferred, make it +follow the +rules](#where-you-create-a-new-deferred-make-it-follow-the-rules). + +So, option one: reset the logcontext before starting the operations to +be gathered: + +```python +@defer.inlineCallbacks +def do_request_handling(): + with PreserveLoggingContext(): + d1 = operation1() + d2 = operation2() + result = yield defer.gatherResults([d1, d2]) +``` + +In this case particularly, though, option two, of using +`context.preserve_fn` almost certainly makes more sense, so that +`operation1` and `operation2` are both logged against the original +logcontext. This looks like: + +```python +@defer.inlineCallbacks +def do_request_handling(): + d1 = context.preserve_fn(operation1)() + d2 = context.preserve_fn(operation2)() + + with PreserveLoggingContext(): + result = yield defer.gatherResults([d1, d2]) +``` + +## Was all this really necessary? + +The conventions used work fine for a linear flow where everything +happens in series via `defer.inlineCallbacks` and `yield`, but are +certainly tricky to follow for any more exotic flows. It's hard not to +wonder if we could have done something else. + +We're not going to rewrite Synapse now, so the following is entirely of +academic interest, but I'd like to record some thoughts on an +alternative approach. + +I briefly prototyped some code following an alternative set of rules. I +think it would work, but I certainly didn't get as far as thinking how +it would interact with concepts as complicated as the cache descriptors. + +My alternative rules were: + +- functions always preserve the logcontext of their caller, whether or + not they are returning a Deferred. +- Deferreds returned by synapse functions run their callbacks in the + same context as the function was orignally called in. + +The main point of this scheme is that everywhere that sets the +logcontext is responsible for clearing it before returning control to +the reactor. + +So, for example, if you were the function which started a +`with LoggingContext` block, you wouldn't `yield` within it --- instead +you'd start off the background process, and then leave the `with` block +to wait for it: + +```python +def handle_request(request_id): + with context.LoggingContext() as request_context: + request_context.request = request_id + d = do_request_handling() + + def cb(r): + logger.debug("finished") + + d.addCallback(cb) + return d +``` + +(in general, mixing `with LoggingContext` blocks and +`defer.inlineCallbacks` in the same function leads to slighly +counter-intuitive code, under this scheme). + +Because we leave the original `with` block as soon as the Deferred is +returned (as opposed to waiting for it to be resolved, as we do today), +the logcontext is cleared before control passes back to the reactor; so +if there is some code within `do_request_handling` which needs to wait +for a Deferred to complete, there is no need for it to worry about +clearing the logcontext before doing so: + +```python +def handle_request(): + r = do_some_stuff() + r.addCallback(do_some_more_stuff) + return r +``` + +--- and provided `do_some_stuff` follows the rules of returning a +Deferred which runs its callbacks in the original logcontext, all is +happy. + +The business of a Deferred which runs its callbacks in the original +logcontext isn't hard to achieve --- we have it today, in the shape of +`context._PreservingContextDeferred`: + +```python +def do_some_stuff(): + deferred = do_some_io() + pcd = _PreservingContextDeferred(LoggingContext.current_context()) + deferred.chainDeferred(pcd) + return pcd +``` + +It turns out that, thanks to the way that Deferreds chain together, we +automatically get the property of a context-preserving deferred with +`defer.inlineCallbacks`, provided the final Defered the function +`yields` on has that property. So we can just write: + +```python +@defer.inlineCallbacks +def handle_request(): + yield do_some_stuff() + yield do_some_more_stuff() +``` + +To conclude: I think this scheme would have worked equally well, with +less danger of messing it up, and probably made some more esoteric code +easier to write. But again --- changing the conventions of the entire +Synapse codebase is not a sensible option for the marginal improvement +offered. + +## A note on garbage-collection of Deferred chains + +It turns out that our logcontext rules do not play nicely with Deferred +chains which get orphaned and garbage-collected. + +Imagine we have some code that looks like this: + +```python +listener_queue = [] + +def on_something_interesting(): + for d in listener_queue: + d.callback("foo") + +@defer.inlineCallbacks +def await_something_interesting(): + new_deferred = defer.Deferred() + listener_queue.append(new_deferred) + + with PreserveLoggingContext(): + yield new_deferred +``` + +Obviously, the idea here is that we have a bunch of things which are +waiting for an event. (It's just an example of the problem here, but a +relatively common one.) + +Now let's imagine two further things happen. First of all, whatever was +waiting for the interesting thing goes away. (Perhaps the request times +out, or something *even more* interesting happens.) + +Secondly, let's suppose that we decide that the interesting thing is +never going to happen, and we reset the listener queue: + +```python +def reset_listener_queue(): + listener_queue.clear() +``` + +So, both ends of the deferred chain have now dropped their references, +and the deferred chain is now orphaned, and will be garbage-collected at +some point. Note that `await_something_interesting` is a generator +function, and when Python garbage-collects generator functions, it gives +them a chance to clean up by making the `yield` raise a `GeneratorExit` +exception. In our case, that means that the `__exit__` handler of +`PreserveLoggingContext` will carefully restore the request context, but +there is now nothing waiting for its return, so the request context is +never cleared. + +To reiterate, this problem only arises when *both* ends of a deferred +chain are dropped. Dropping the the reference to a deferred you're +supposed to be calling is probably bad practice, so this doesn't +actually happen too much. Unfortunately, when it does happen, it will +lead to leaked logcontexts which are incredibly hard to track down. diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst deleted file mode 100644 index 4502cd945..000000000 --- a/docs/log_contexts.rst +++ /dev/null @@ -1,498 +0,0 @@ -Log Contexts -============ - -.. contents:: - -To help track the processing of individual requests, synapse uses a -'log context' to track which request it is handling at any given moment. This -is done via a thread-local variable; a ``logging.Filter`` is then used to fish -the information back out of the thread-local variable and add it to each log -record. - -Logcontexts are also used for CPU and database accounting, so that we can track -which requests were responsible for high CPU use or database activity. - -The ``synapse.logging.context`` module provides a facilities for managing the -current log context (as well as providing the ``LoggingContextFilter`` class). - -Deferreds make the whole thing complicated, so this document describes how it -all works, and how to write code which follows the rules. - -Logcontexts without Deferreds ------------------------------ - -In the absence of any Deferred voodoo, things are simple enough. As with any -code of this nature, the rule is that our function should leave things as it -found them: - -.. code:: python - - from synapse.logging import context # omitted from future snippets - - def handle_request(request_id): - request_context = context.LoggingContext() - - calling_context = context.LoggingContext.current_context() - context.LoggingContext.set_current_context(request_context) - try: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - finally: - context.LoggingContext.set_current_context(calling_context) - - def do_request_handling(): - logger.debug("phew") # this will be logged against request_id - - -LoggingContext implements the context management methods, so the above can be -written much more succinctly as: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - do_request_handling() - logger.debug("finished") - - def do_request_handling(): - logger.debug("phew") - - -Using logcontexts with Deferreds --------------------------------- - -Deferreds — and in particular, ``defer.inlineCallbacks`` — break -the linear flow of code so that there is no longer a single entry point where -we should set the logcontext and a single exit point where we should remove it. - -Consider the example above, where ``do_request_handling`` needs to do some -blocking operation, and returns a deferred: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - yield do_request_handling() - logger.debug("finished") - - -In the above flow: - -* The logcontext is set -* ``do_request_handling`` is called, and returns a deferred -* ``handle_request`` yields the deferred -* The ``inlineCallbacks`` wrapper of ``handle_request`` returns a deferred - -So we have stopped processing the request (and will probably go on to start -processing the next), without clearing the logcontext. - -To circumvent this problem, synapse code assumes that, wherever you have a -deferred, you will want to yield on it. To that end, whereever functions return -a deferred, we adopt the following conventions: - -**Rules for functions returning deferreds:** - - * If the deferred is already complete, the function returns with the same - logcontext it started with. - * If the deferred is incomplete, the function clears the logcontext before - returning; when the deferred completes, it restores the logcontext before - running any callbacks. - -That sounds complicated, but actually it means a lot of code (including the -example above) "just works". There are two cases: - -* If ``do_request_handling`` returns a completed deferred, then the logcontext - will still be in place. In this case, execution will continue immediately - after the ``yield``; the "finished" line will be logged against the right - context, and the ``with`` block restores the original context before we - return to the caller. - -* If the returned deferred is incomplete, ``do_request_handling`` clears the - logcontext before returning. The logcontext is therefore clear when - ``handle_request`` yields the deferred. At that point, the ``inlineCallbacks`` - wrapper adds a callback to the deferred, and returns another (incomplete) - deferred to the caller, and it is safe to begin processing the next request. - - Once ``do_request_handling``'s deferred completes, it will reinstate the - logcontext, before running the callback added by the ``inlineCallbacks`` - wrapper. That callback runs the second half of ``handle_request``, so again - the "finished" line will be logged against the right - context, and the ``with`` block restores the original context. - -As an aside, it's worth noting that ``handle_request`` follows our rules - -though that only matters if the caller has its own logcontext which it cares -about. - -The following sections describe pitfalls and helpful patterns when implementing -these rules. - -Always yield your deferreds ---------------------------- - -Whenever you get a deferred back from a function, you should ``yield`` on it -as soon as possible. (Returning it directly to your caller is ok too, if you're -not doing ``inlineCallbacks``.) Do not pass go; do not do any logging; do not -call any other functions. - -.. code:: python - - @defer.inlineCallbacks - def fun(): - logger.debug("starting") - yield do_some_stuff() # just like this - - d = more_stuff() - result = yield d # also fine, of course - - return result - - def nonInlineCallbacksFun(): - logger.debug("just a wrapper really") - return do_some_stuff() # this is ok too - the caller will yield on - # it anyway. - -Provided this pattern is followed all the way back up to the callchain to where -the logcontext was set, this will make things work out ok: provided -``do_some_stuff`` and ``more_stuff`` follow the rules above, then so will -``fun`` (as wrapped by ``inlineCallbacks``) and ``nonInlineCallbacksFun``. - -It's all too easy to forget to ``yield``: for instance if we forgot that -``do_some_stuff`` returned a deferred, we might plough on regardless. This -leads to a mess; it will probably work itself out eventually, but not before -a load of stuff has been logged against the wrong context. (Normally, other -things will break, more obviously, if you forget to ``yield``, so this tends -not to be a major problem in practice.) - -Of course sometimes you need to do something a bit fancier with your Deferreds -- not all code follows the linear A-then-B-then-C pattern. Notes on -implementing more complex patterns are in later sections. - -Where you create a new Deferred, make it follow the rules ---------------------------------------------------------- - -Most of the time, a Deferred comes from another synapse function. Sometimes, -though, we need to make up a new Deferred, or we get a Deferred back from -external code. We need to make it follow our rules. - -The easy way to do it is with a combination of ``defer.inlineCallbacks``, and -``context.PreserveLoggingContext``. Suppose we want to implement ``sleep``, -which returns a deferred which will run its callbacks after a given number of -seconds. That might look like: - -.. code:: python - - # not a logcontext-rules-compliant function - def get_sleep_deferred(seconds): - d = defer.Deferred() - reactor.callLater(seconds, d.callback, None) - return d - -That doesn't follow the rules, but we can fix it by wrapping it with -``PreserveLoggingContext`` and ``yield`` ing on it: - -.. code:: python - - @defer.inlineCallbacks - def sleep(seconds): - with PreserveLoggingContext(): - yield get_sleep_deferred(seconds) - -This technique works equally for external functions which return deferreds, -or deferreds we have made ourselves. - -You can also use ``context.make_deferred_yieldable``, which just does the -boilerplate for you, so the above could be written: - -.. code:: python - - def sleep(seconds): - return context.make_deferred_yieldable(get_sleep_deferred(seconds)) - - -Fire-and-forget ---------------- - -Sometimes you want to fire off a chain of execution, but not wait for its -result. That might look a bit like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # *don't* do this - background_operation() - - logger.debug("Request handling complete") - - @defer.inlineCallbacks - def background_operation(): - yield first_background_step() - logger.debug("Completed first step") - yield second_background_step() - logger.debug("Completed second step") - -The above code does a couple of steps in the background after -``do_request_handling`` has finished. The log lines are still logged against -the ``request_context`` logcontext, which may or may not be desirable. There -are two big problems with the above, however. The first problem is that, if -``background_operation`` returns an incomplete Deferred, it will expect its -caller to ``yield`` immediately, so will have cleared the logcontext. In this -example, that means that 'Request handling complete' will be logged without any -context. - -The second problem, which is potentially even worse, is that when the Deferred -returned by ``background_operation`` completes, it will restore the original -logcontext. There is nothing waiting on that Deferred, so the logcontext will -leak into the reactor and possibly get attached to some arbitrary future -operation. - -There are two potential solutions to this. - -One option is to surround the call to ``background_operation`` with a -``PreserveLoggingContext`` call. That will reset the logcontext before -starting ``background_operation`` (so the context restored when the deferred -completes will be the empty logcontext), and will restore the current -logcontext before continuing the foreground process: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - # start background_operation off in the empty logcontext, to - # avoid leaking the current context into the reactor. - with PreserveLoggingContext(): - background_operation() - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Obviously that option means that the operations done in -``background_operation`` would be not be logged against a logcontext (though -that might be fixed by setting a different logcontext via a ``with -LoggingContext(...)`` in ``background_operation``). - -The second option is to use ``context.run_in_background``, which wraps a -function so that it doesn't reset the logcontext even when it returns an -incomplete deferred, and adds a callback to the returned deferred to reset the -logcontext. In other words, it turns a function that follows the Synapse rules -about logcontexts and Deferreds into one which behaves more like an external -function — the opposite operation to that described in the previous section. -It can be used like this: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - yield foreground_operation() - - context.run_in_background(background_operation) - - # this will now be logged against the request context - logger.debug("Request handling complete") - -Passing synapse deferreds into third-party functions ----------------------------------------------------- - -A typical example of this is where we want to collect together two or more -deferred via ``defer.gatherResults``: - -.. code:: python - - d1 = operation1() - d2 = operation2() - d3 = defer.gatherResults([d1, d2]) - -This is really a variation of the fire-and-forget problem above, in that we are -firing off ``d1`` and ``d2`` without yielding on them. The difference -is that we now have third-party code attached to their callbacks. Anyway either -technique given in the `Fire-and-forget`_ section will work. - -Of course, the new Deferred returned by ``gatherResults`` needs to be wrapped -in order to make it follow the logcontext rules before we can yield it, as -described in `Where you create a new Deferred, make it follow the rules`_. - -So, option one: reset the logcontext before starting the operations to be -gathered: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - with PreserveLoggingContext(): - d1 = operation1() - d2 = operation2() - result = yield defer.gatherResults([d1, d2]) - -In this case particularly, though, option two, of using -``context.preserve_fn`` almost certainly makes more sense, so that -``operation1`` and ``operation2`` are both logged against the original -logcontext. This looks like: - -.. code:: python - - @defer.inlineCallbacks - def do_request_handling(): - d1 = context.preserve_fn(operation1)() - d2 = context.preserve_fn(operation2)() - - with PreserveLoggingContext(): - result = yield defer.gatherResults([d1, d2]) - - -Was all this really necessary? ------------------------------- - -The conventions used work fine for a linear flow where everything happens in -series via ``defer.inlineCallbacks`` and ``yield``, but are certainly tricky to -follow for any more exotic flows. It's hard not to wonder if we could have done -something else. - -We're not going to rewrite Synapse now, so the following is entirely of -academic interest, but I'd like to record some thoughts on an alternative -approach. - -I briefly prototyped some code following an alternative set of rules. I think -it would work, but I certainly didn't get as far as thinking how it would -interact with concepts as complicated as the cache descriptors. - -My alternative rules were: - -* functions always preserve the logcontext of their caller, whether or not they - are returning a Deferred. - -* Deferreds returned by synapse functions run their callbacks in the same - context as the function was orignally called in. - -The main point of this scheme is that everywhere that sets the logcontext is -responsible for clearing it before returning control to the reactor. - -So, for example, if you were the function which started a ``with -LoggingContext`` block, you wouldn't ``yield`` within it — instead you'd start -off the background process, and then leave the ``with`` block to wait for it: - -.. code:: python - - def handle_request(request_id): - with context.LoggingContext() as request_context: - request_context.request = request_id - d = do_request_handling() - - def cb(r): - logger.debug("finished") - - d.addCallback(cb) - return d - -(in general, mixing ``with LoggingContext`` blocks and -``defer.inlineCallbacks`` in the same function leads to slighly -counter-intuitive code, under this scheme). - -Because we leave the original ``with`` block as soon as the Deferred is -returned (as opposed to waiting for it to be resolved, as we do today), the -logcontext is cleared before control passes back to the reactor; so if there is -some code within ``do_request_handling`` which needs to wait for a Deferred to -complete, there is no need for it to worry about clearing the logcontext before -doing so: - -.. code:: python - - def handle_request(): - r = do_some_stuff() - r.addCallback(do_some_more_stuff) - return r - -— and provided ``do_some_stuff`` follows the rules of returning a Deferred which -runs its callbacks in the original logcontext, all is happy. - -The business of a Deferred which runs its callbacks in the original logcontext -isn't hard to achieve — we have it today, in the shape of -``context._PreservingContextDeferred``: - -.. code:: python - - def do_some_stuff(): - deferred = do_some_io() - pcd = _PreservingContextDeferred(LoggingContext.current_context()) - deferred.chainDeferred(pcd) - return pcd - -It turns out that, thanks to the way that Deferreds chain together, we -automatically get the property of a context-preserving deferred with -``defer.inlineCallbacks``, provided the final Defered the function ``yields`` -on has that property. So we can just write: - -.. code:: python - - @defer.inlineCallbacks - def handle_request(): - yield do_some_stuff() - yield do_some_more_stuff() - -To conclude: I think this scheme would have worked equally well, with less -danger of messing it up, and probably made some more esoteric code easier to -write. But again — changing the conventions of the entire Synapse codebase is -not a sensible option for the marginal improvement offered. - - -A note on garbage-collection of Deferred chains ------------------------------------------------ - -It turns out that our logcontext rules do not play nicely with Deferred -chains which get orphaned and garbage-collected. - -Imagine we have some code that looks like this: - -.. code:: python - - listener_queue = [] - - def on_something_interesting(): - for d in listener_queue: - d.callback("foo") - - @defer.inlineCallbacks - def await_something_interesting(): - new_deferred = defer.Deferred() - listener_queue.append(new_deferred) - - with PreserveLoggingContext(): - yield new_deferred - -Obviously, the idea here is that we have a bunch of things which are waiting -for an event. (It's just an example of the problem here, but a relatively -common one.) - -Now let's imagine two further things happen. First of all, whatever was -waiting for the interesting thing goes away. (Perhaps the request times out, -or something *even more* interesting happens.) - -Secondly, let's suppose that we decide that the interesting thing is never -going to happen, and we reset the listener queue: - -.. code:: python - - def reset_listener_queue(): - listener_queue.clear() - -So, both ends of the deferred chain have now dropped their references, and the -deferred chain is now orphaned, and will be garbage-collected at some point. -Note that ``await_something_interesting`` is a generator function, and when -Python garbage-collects generator functions, it gives them a chance to clean -up by making the ``yield`` raise a ``GeneratorExit`` exception. In our case, -that means that the ``__exit__`` handler of ``PreserveLoggingContext`` will -carefully restore the request context, but there is now nothing waiting for -its return, so the request context is never cleared. - -To reiterate, this problem only arises when *both* ends of a deferred chain -are dropped. Dropping the the reference to a deferred you're supposed to be -calling is probably bad practice, so this doesn't actually happen too much. -Unfortunately, when it does happen, it will lead to leaked logcontexts which -are incredibly hard to track down. diff --git a/docs/media_repository.md b/docs/media_repository.md new file mode 100644 index 000000000..1bf8f16f5 --- /dev/null +++ b/docs/media_repository.md @@ -0,0 +1,30 @@ +# Media Repository + +*Synapse implementation-specific details for the media repository* + +The media repository is where attachments and avatar photos are stored. +It stores attachment content and thumbnails for media uploaded by local users. +It caches attachment content and thumbnails for media uploaded by remote users. + +## Storage + +Each item of media is assigned a `media_id` when it is uploaded. +The `media_id` is a randomly chosen, URL safe 24 character string. + +Metadata such as the MIME type, upload time and length are stored in the +sqlite3 database indexed by `media_id`. + +Content is stored on the filesystem under a `"local_content"` directory. + +Thumbnails are stored under a `"local_thumbnails"` directory. + +The item with `media_id` `"aabbccccccccdddddddddddd"` is stored under +`"local_content/aa/bb/ccccccccdddddddddddd"`. Its thumbnail with width +`128` and height `96` and type `"image/jpeg"` is stored under +`"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"` + +Remote content is cached under `"remote_content"` directory. Each item of +remote content is assigned a local `"filesystem_id"` to ensure that the +directory structure `"remote_content/server_name/aa/bb/ccccccccdddddddddddd"` +is appropriate. Thumbnails for remote content are stored under +`"remote_thumbnails/server_name/..."` diff --git a/docs/media_repository.rst b/docs/media_repository.rst deleted file mode 100644 index 1037b5be6..000000000 --- a/docs/media_repository.rst +++ /dev/null @@ -1,27 +0,0 @@ -Media Repository -================ - -*Synapse implementation-specific details for the media repository* - -The media repository is where attachments and avatar photos are stored. -It stores attachment content and thumbnails for media uploaded by local users. -It caches attachment content and thumbnails for media uploaded by remote users. - -Storage -------- - -Each item of media is assigned a ``media_id`` when it is uploaded. -The ``media_id`` is a randomly chosen, URL safe 24 character string. -Metadata such as the MIME type, upload time and length are stored in the -sqlite3 database indexed by ``media_id``. -Content is stored on the filesystem under a ``"local_content"`` directory. -Thumbnails are stored under a ``"local_thumbnails"`` directory. -The item with ``media_id`` ``"aabbccccccccdddddddddddd"`` is stored under -``"local_content/aa/bb/ccccccccdddddddddddd"``. Its thumbnail with width -``128`` and height ``96`` and type ``"image/jpeg"`` is stored under -``"local_thumbnails/aa/bb/ccccccccdddddddddddd/128-96-image-jpeg"`` -Remote content is cached under ``"remote_content"`` directory. Each item of -remote content is assigned a local "``filesystem_id``" to ensure that the -directory structure ``"remote_content/server_name/aa/bb/ccccccccdddddddddddd"`` -is appropriate. Thumbnails for remote content are stored under -``"remote_thumbnails/server_name/..."`` diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md new file mode 100644 index 000000000..32abb9f44 --- /dev/null +++ b/docs/metrics-howto.md @@ -0,0 +1,217 @@ +# How to monitor Synapse metrics using Prometheus + +1. Install Prometheus: + + Follow instructions at + + +1. Enable Synapse metrics: + + There are two methods of enabling metrics in Synapse. + + The first serves the metrics as a part of the usual web server and + can be enabled by adding the \"metrics\" resource to the existing + listener as such: + + resources: + - names: + - client + - metrics + + This provides a simple way of adding metrics to your Synapse + installation, and serves under `/_synapse/metrics`. If you do not + wish your metrics be publicly exposed, you will need to either + filter it out at your load balancer, or use the second method. + + The second method runs the metrics server on a different port, in a + different thread to Synapse. This can make it more resilient to + heavy load meaning metrics cannot be retrieved, and can be exposed + to just internal networks easier. The served metrics are available + over HTTP only, and will be available at `/`. + + Add a new listener to homeserver.yaml: + + listeners: + - type: metrics + port: 9000 + bind_addresses: + - '0.0.0.0' + + For both options, you will need to ensure that `enable_metrics` is + set to `True`. + +1. Restart Synapse. + +1. Add a Prometheus target for Synapse. + + It needs to set the `metrics_path` to a non-default value (under + `scrape_configs`): + + - job_name: "synapse" + metrics_path: "/_synapse/metrics" + static_configs: + - targets: ["my.server.here:port"] + + where `my.server.here` is the IP address of Synapse, and `port` is + the listener port configured with the `metrics` resource. + + If your prometheus is older than 1.5.2, you will need to replace + `static_configs` in the above with `target_groups`. + +1. Restart Prometheus. + +## Renaming of metrics & deprecation of old names in 1.2 + +Synapse 1.2 updates the Prometheus metrics to match the naming +convention of the upstream `prometheus_client`. The old names are +considered deprecated and will be removed in a future version of +Synapse. + +| New Name | Old Name | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- | +| python_gc_objects_collected_total | python_gc_objects_collected | +| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | +| python_gc_collections_total | python_gc_collections | +| process_cpu_seconds_total | process_cpu_seconds | +| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | +| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | +| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | +| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | +| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | +| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | +| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | +| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | +| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | +| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | +| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | +| synapse_background_process_start_count_total | synapse_background_process_start_count | +| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | +| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | +| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | +| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | +| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | +| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | +| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | +| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | +| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | +| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | +| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | +| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | +| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | +| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | +| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | +| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | +| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | +| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | +| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | +| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | +| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | +| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | +| synapse_notifier_notified_events_total | synapse_notifier_notified_events | +| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | +| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | +| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | +| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | +| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | +| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | + +Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 +--------------------------------------------------------------------------------- + +The duplicated metrics deprecated in Synapse 0.27.0 have been removed. + +All time duration-based metrics have been changed to be seconds. This +affects: + +| msec -> sec metrics | +| -------------------------------------- | +| python_gc_time | +| python_twisted_reactor_tick_time | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Several metrics have been changed to be histograms, which sort entries +into buckets and allow better analysis. The following metrics are now +histograms: + +| Altered metrics | +| ------------------------------------------------ | +| python_gc_time | +| python_twisted_reactor_pending_calls | +| python_twisted_reactor_tick_time | +| synapse_http_server_response_time_seconds | +| synapse_storage_query_time | +| synapse_storage_schedule_time | +| synapse_storage_transaction_time | + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate +`*:count` metrics reported for the resource tracking for code blocks and +HTTP requests. + +At the same time, the corresponding `*:total` metrics are being renamed, +as the `:total` suffix no longer makes sense in the absence of a +corresponding `:count` metric. + +To enable a graceful migration path, this release just adds new names +for the metrics being renamed. A future release will remove the old +ones. + +The following table shows the new metrics, and the old metrics which +they are replacing. + +| New name | Old name | +| ------------------------------------------------------------- | ---------------------------------------------------------- | +| synapse_util_metrics_block_count | synapse_util_metrics_block_timer:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_utime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_ru_stime:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_count:count | +| synapse_util_metrics_block_count | synapse_util_metrics_block_db_txn_duration:count | +| synapse_util_metrics_block_time_seconds | synapse_util_metrics_block_timer:total | +| synapse_util_metrics_block_ru_utime_seconds | synapse_util_metrics_block_ru_utime:total | +| synapse_util_metrics_block_ru_stime_seconds | synapse_util_metrics_block_ru_stime:total | +| synapse_util_metrics_block_db_txn_count | synapse_util_metrics_block_db_txn_count:total | +| synapse_util_metrics_block_db_txn_duration_seconds | synapse_util_metrics_block_db_txn_duration:total | +| synapse_http_server_response_count | synapse_http_server_requests | +| synapse_http_server_response_count | synapse_http_server_response_time:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_utime:count | +| synapse_http_server_response_count | synapse_http_server_response_ru_stime:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_count:count | +| synapse_http_server_response_count | synapse_http_server_response_db_txn_duration:count | +| synapse_http_server_response_time_seconds | synapse_http_server_response_time:total | +| synapse_http_server_response_ru_utime_seconds | synapse_http_server_response_ru_utime:total | +| synapse_http_server_response_ru_stime_seconds | synapse_http_server_response_ru_stime:total | +| synapse_http_server_response_db_txn_count | synapse_http_server_response_db_txn_count:total | +| synapse_http_server_response_db_txn_duration_seconds | synapse_http_server_response_db_txn_duration:total | + +Standard Metric Names +--------------------- + +As of synapse version 0.18.2, the format of the process-wide metrics has +been changed to fit prometheus standard naming conventions. Additionally +the units have been changed to seconds, from miliseconds. + +| New name | Old name | +| ---------------------------------------- | --------------------------------- | +| process_cpu_user_seconds_total | process_resource_utime / 1000 | +| process_cpu_system_seconds_total | process_resource_stime / 1000 | +| process_open_fds (no \'type\' label) | process_fds | + +The python-specific counts of garbage collector performance have been +renamed. + +| New name | Old name | +| -------------------------------- | -------------------------- | +| python_gc_time | reactor_gc_time | +| python_gc_unreachable_total | reactor_gc_unreachable | +| python_gc_counts | reactor_gc_counts | + +The twisted-specific reactor metrics have been renamed. + +| New name | Old name | +| -------------------------------------- | ----------------------- | +| python_twisted_reactor_pending_calls | reactor_pending_calls | +| python_twisted_reactor_tick_time | reactor_tick_time | diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst deleted file mode 100644 index 973641f3d..000000000 --- a/docs/metrics-howto.rst +++ /dev/null @@ -1,285 +0,0 @@ -How to monitor Synapse metrics using Prometheus -=============================================== - -1. Install Prometheus: - - Follow instructions at http://prometheus.io/docs/introduction/install/ - -2. Enable Synapse metrics: - - There are two methods of enabling metrics in Synapse. - - The first serves the metrics as a part of the usual web server and can be - enabled by adding the "metrics" resource to the existing listener as such:: - - resources: - - names: - - client - - metrics - - This provides a simple way of adding metrics to your Synapse installation, - and serves under ``/_synapse/metrics``. If you do not wish your metrics be - publicly exposed, you will need to either filter it out at your load - balancer, or use the second method. - - The second method runs the metrics server on a different port, in a - different thread to Synapse. This can make it more resilient to heavy load - meaning metrics cannot be retrieved, and can be exposed to just internal - networks easier. The served metrics are available over HTTP only, and will - be available at ``/``. - - Add a new listener to homeserver.yaml:: - - listeners: - - type: metrics - port: 9000 - bind_addresses: - - '0.0.0.0' - - For both options, you will need to ensure that ``enable_metrics`` is set to - ``True``. - - Restart Synapse. - -3. Add a Prometheus target for Synapse. - - It needs to set the ``metrics_path`` to a non-default value (under ``scrape_configs``):: - - - job_name: "synapse" - metrics_path: "/_synapse/metrics" - static_configs: - - targets: ["my.server.here:port"] - - where ``my.server.here`` is the IP address of Synapse, and ``port`` is the listener port - configured with the ``metrics`` resource. - - If your prometheus is older than 1.5.2, you will need to replace - ``static_configs`` in the above with ``target_groups``. - - Restart Prometheus. - - -Renaming of metrics & deprecation of old names in 1.2 ------------------------------------------------------ - -Synapse 1.2 updates the Prometheus metrics to match the naming convention of the -upstream ``prometheus_client``. The old names are considered deprecated and will -be removed in a future version of Synapse. - -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| New Name | Old Name | -+=============================================================================+=======================================================================+ -| python_gc_objects_collected_total | python_gc_objects_collected | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| python_gc_collections_total | python_gc_collections | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| process_cpu_seconds_total | process_cpu_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_start_count_total | synapse_background_process_start_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_notifier_notified_events_total | synapse_notifier_notified_events | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ -| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | -+-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ - - -Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 ---------------------------------------------------------------------------------- - -The duplicated metrics deprecated in Synapse 0.27.0 have been removed. - -All time duration-based metrics have been changed to be seconds. This affects: - -+----------------------------------+ -| msec -> sec metrics | -+==================================+ -| python_gc_time | -+----------------------------------+ -| python_twisted_reactor_tick_time | -+----------------------------------+ -| synapse_storage_query_time | -+----------------------------------+ -| synapse_storage_schedule_time | -+----------------------------------+ -| synapse_storage_transaction_time | -+----------------------------------+ - -Several metrics have been changed to be histograms, which sort entries into -buckets and allow better analysis. The following metrics are now histograms: - -+-------------------------------------------+ -| Altered metrics | -+===========================================+ -| python_gc_time | -+-------------------------------------------+ -| python_twisted_reactor_pending_calls | -+-------------------------------------------+ -| python_twisted_reactor_tick_time | -+-------------------------------------------+ -| synapse_http_server_response_time_seconds | -+-------------------------------------------+ -| synapse_storage_query_time | -+-------------------------------------------+ -| synapse_storage_schedule_time | -+-------------------------------------------+ -| synapse_storage_transaction_time | -+-------------------------------------------+ - - -Block and response metrics renamed for 0.27.0 ---------------------------------------------- - -Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` -metrics reported for the resource tracking for code blocks and HTTP requests. - -At the same time, the corresponding ``*:total`` metrics are being renamed, as -the ``:total`` suffix no longer makes sense in the absence of a corresponding -``:count`` metric. - -To enable a graceful migration path, this release just adds new names for the -metrics being renamed. A future release will remove the old ones. - -The following table shows the new metrics, and the old metrics which they are -replacing. - -==================================================== =================================================== -New name Old name -==================================================== =================================================== -synapse_util_metrics_block_count synapse_util_metrics_block_timer:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count -synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count -synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count - -synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total -synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total -synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total -synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total -synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total - -synapse_http_server_response_count synapse_http_server_requests -synapse_http_server_response_count synapse_http_server_response_time:count -synapse_http_server_response_count synapse_http_server_response_ru_utime:count -synapse_http_server_response_count synapse_http_server_response_ru_stime:count -synapse_http_server_response_count synapse_http_server_response_db_txn_count:count -synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count - -synapse_http_server_response_time_seconds synapse_http_server_response_time:total -synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total -synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total -synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total -synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total -==================================================== =================================================== - - -Standard Metric Names ---------------------- - -As of synapse version 0.18.2, the format of the process-wide metrics has been -changed to fit prometheus standard naming conventions. Additionally the units -have been changed to seconds, from miliseconds. - -================================== ============================= -New name Old name -================================== ============================= -process_cpu_user_seconds_total process_resource_utime / 1000 -process_cpu_system_seconds_total process_resource_stime / 1000 -process_open_fds (no 'type' label) process_fds -================================== ============================= - -The python-specific counts of garbage collector performance have been renamed. - -=========================== ====================== -New name Old name -=========================== ====================== -python_gc_time reactor_gc_time -python_gc_unreachable_total reactor_gc_unreachable -python_gc_counts reactor_gc_counts -=========================== ====================== - -The twisted-specific reactor metrics have been renamed. - -==================================== ===================== -New name Old name -==================================== ===================== -python_twisted_reactor_pending_calls reactor_pending_calls -python_twisted_reactor_tick_time reactor_tick_time -==================================== ===================== diff --git a/docs/opentracing.md b/docs/opentracing.md new file mode 100644 index 000000000..4c7a56a5d --- /dev/null +++ b/docs/opentracing.md @@ -0,0 +1,93 @@ +# OpenTracing + +## Background + +OpenTracing is a semi-standard being adopted by a number of distributed +tracing platforms. It is a common api for facilitating vendor-agnostic +tracing instrumentation. That is, we can use the OpenTracing api and +select one of a number of tracer implementations to do the heavy lifting +in the background. Our current selected implementation is Jaeger. + +OpenTracing is a tool which gives an insight into the causal +relationship of work done in and between servers. The servers each track +events and report them to a centralised server - in Synapse's case: +Jaeger. The basic unit used to represent events is the span. The span +roughly represents a single piece of work that was done and the time at +which it occurred. A span can have child spans, meaning that the work of +the child had to be completed for the parent span to complete, or it can +have follow-on spans which represent work that is undertaken as a result +of the parent but is not depended on by the parent to in order to +finish. + +Since this is undertaken in a distributed environment a request to +another server, such as an RPC or a simple GET, can be considered a span +(a unit or work) for the local server. This causal link is what +OpenTracing aims to capture and visualise. In order to do this metadata +about the local server's span, i.e the 'span context', needs to be +included with the request to the remote. + +It is up to the remote server to decide what it does with the spans it +creates. This is called the sampling policy and it can be configured +through Jaeger's settings. + +For OpenTracing concepts see +. + +For more information about Jaeger's implementation see + + +## Setting up OpenTracing + +To receive OpenTracing spans, start up a Jaeger server. This can be done +using docker like so: + +```sh +docker run -d --name jaeger + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + jaegertracing/all-in-one:1.13 +``` + +Latest documentation is probably at + + +## Enable OpenTracing in Synapse + +OpenTracing is not enabled by default. It must be enabled in the +homeserver config by uncommenting the config options under `opentracing` +as shown in the [sample config](./sample_config.yaml). For example: + +```yaml +opentracing: + tracer_enabled: true + homeserver_whitelist: + - "mytrustedhomeserver.org" + - "*.myotherhomeservers.com" +``` + +## Homeserver whitelisting + +The homeserver whitelist is configured using regular expressions. A list +of regular expressions can be given and their union will be compared +when propagating any spans contexts to another homeserver. + +Though it's mostly safe to send and receive span contexts to and from +untrusted users since span contexts are usually opaque ids it can lead +to two problems, namely: + +- If the span context is marked as sampled by the sending homeserver + the receiver will sample it. Therefore two homeservers with wildly + different sampling policies could incur higher sampling counts than + intended. +- Sending servers can attach arbitrary data to spans, known as + 'baggage'. For safety this has been disabled in Synapse but that + doesn't prevent another server sending you baggage which will be + logged to OpenTracing's logs. + +## Configuring Jaeger + +Sampling strategies can be set as in this document: + diff --git a/docs/opentracing.rst b/docs/opentracing.rst deleted file mode 100644 index 6e98ab56b..000000000 --- a/docs/opentracing.rst +++ /dev/null @@ -1,123 +0,0 @@ -=========== -OpenTracing -=========== - -Background ----------- - -OpenTracing is a semi-standard being adopted by a number of distributed tracing -platforms. It is a common api for facilitating vendor-agnostic tracing -instrumentation. That is, we can use the OpenTracing api and select one of a -number of tracer implementations to do the heavy lifting in the background. -Our current selected implementation is Jaeger. - -OpenTracing is a tool which gives an insight into the causal relationship of -work done in and between servers. The servers each track events and report them -to a centralised server - in Synapse's case: Jaeger. The basic unit used to -represent events is the span. The span roughly represents a single piece of work -that was done and the time at which it occurred. A span can have child spans, -meaning that the work of the child had to be completed for the parent span to -complete, or it can have follow-on spans which represent work that is undertaken -as a result of the parent but is not depended on by the parent to in order to -finish. - -Since this is undertaken in a distributed environment a request to another -server, such as an RPC or a simple GET, can be considered a span (a unit or -work) for the local server. This causal link is what OpenTracing aims to -capture and visualise. In order to do this metadata about the local server's -span, i.e the 'span context', needs to be included with the request to the -remote. - -It is up to the remote server to decide what it does with the spans -it creates. This is called the sampling policy and it can be configured -through Jaeger's settings. - -For OpenTracing concepts see -https://opentracing.io/docs/overview/what-is-tracing/. - -For more information about Jaeger's implementation see -https://www.jaegertracing.io/docs/ - -===================== -Seting up OpenTracing -===================== - -To receive OpenTracing spans, start up a Jaeger server. This can be done -using docker like so: - -.. code-block:: bash - - docker run -d --name jaeger - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p 16686:16686 \ - -p 14268:14268 \ - jaegertracing/all-in-one:1.13 - -Latest documentation is probably at -https://www.jaegertracing.io/docs/1.13/getting-started/ - - -Enable OpenTracing in Synapse ------------------------------ - -OpenTracing is not enabled by default. It must be enabled in the homeserver -config by uncommenting the config options under ``opentracing`` as shown in -the `sample config <./sample_config.yaml>`_. For example: - -.. code-block:: yaml - - opentracing: - tracer_enabled: true - homeserver_whitelist: - - "mytrustedhomeserver.org" - - "*.myotherhomeservers.com" - -Homeserver whitelisting ------------------------ - -The homeserver whitelist is configured using regular expressions. A list of regular -expressions can be given and their union will be compared when propagating any -spans contexts to another homeserver. - -Though it's mostly safe to send and receive span contexts to and from -untrusted users since span contexts are usually opaque ids it can lead to -two problems, namely: - -- If the span context is marked as sampled by the sending homeserver the receiver will - sample it. Therefore two homeservers with wildly different sampling policies - could incur higher sampling counts than intended. -- Sending servers can attach arbitrary data to spans, known as 'baggage'. For safety this has been disabled in Synapse - but that doesn't prevent another server sending you baggage which will be logged - to OpenTracing's logs. - -========== -EDU FORMAT -========== - -EDUs can contain tracing data in their content. This is not specced but -it could be of interest for other homeservers. - -EDU format (if you're using jaeger): - -.. code-block:: json - - { - "edu_type": "type", - "content": { - "org.matrix.opentracing_context": { - "uber-trace-id": "fe57cf3e65083289" - } - } - } - -Though you don't have to use jaeger you must inject the span context into -`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method. - -================== -Configuring Jaeger -================== - -Sampling strategies can be set as in this document: -https://www.jaegertracing.io/docs/1.13/sampling/ diff --git a/docs/password_auth_providers.md b/docs/password_auth_providers.md new file mode 100644 index 000000000..0db1a3804 --- /dev/null +++ b/docs/password_auth_providers.md @@ -0,0 +1,116 @@ +# Password auth provider modules + +Password auth providers offer a way for server administrators to +integrate their Synapse installation with an existing authentication +system. + +A password auth provider is a Python class which is dynamically loaded +into Synapse, and provides a number of methods by which it can integrate +with the authentication system. + +This document serves as a reference for those looking to implement their +own password auth providers. + +## Required methods + +Password auth provider classes must provide the following methods: + +*class* `SomeProvider.parse_config`(*config*) + +> This method is passed the `config` object for this module from the +> homeserver configuration file. +> +> It should perform any appropriate sanity checks on the provided +> configuration, and return an object which is then passed into +> `__init__`. + +*class* `SomeProvider`(*config*, *account_handler*) + +> The constructor is passed the config object returned by +> `parse_config`, and a `synapse.module_api.ModuleApi` object which +> allows the password provider to check if accounts exist and/or create +> new ones. + +## Optional methods + +Password auth provider classes may optionally provide the following +methods. + +*class* `SomeProvider.get_db_schema_files`() + +> This method, if implemented, should return an Iterable of +> `(name, stream)` pairs of database schema files. Each file is applied +> in turn at initialisation, and a record is then made in the database +> so that it is not re-applied on the next start. + +`someprovider.get_supported_login_types`() + +> This method, if implemented, should return a `dict` mapping from a +> login type identifier (such as `m.login.password`) to an iterable +> giving the fields which must be provided by the user in the submission +> to the `/login` api. These fields are passed in the `login_dict` +> dictionary to `check_auth`. +> +> For example, if a password auth provider wants to implement a custom +> login type of `com.example.custom_login`, where the client is expected +> to pass the fields `secret1` and `secret2`, the provider should +> implement this method and return the following dict: +> +> {"com.example.custom_login": ("secret1", "secret2")} + +`someprovider.check_auth`(*username*, *login_type*, *login_dict*) + +> This method is the one that does the real work. If implemented, it +> will be called for each login attempt where the login type matches one +> of the keys returned by `get_supported_login_types`. +> +> It is passed the (possibly UNqualified) `user` provided by the client, +> the login type, and a dictionary of login secrets passed by the +> client. +> +> The method should return a Twisted `Deferred` object, which resolves +> to the canonical `@localpart:domain` user id if authentication is +> successful, and `None` if not. +> +> Alternatively, the `Deferred` can resolve to a `(str, func)` tuple, in +> which case the second field is a callback which will be called with +> the result from the `/login` call (including `access_token`, +> `device_id`, etc.) + +`someprovider.check_3pid_auth`(*medium*, *address*, *password*) + +> This method, if implemented, is called when a user attempts to +> register or log in with a third party identifier, such as email. It is +> passed the medium (ex. "email"), an address (ex. +> "") and the user's password. +> +> The method should return a Twisted `Deferred` object, which resolves +> to a `str` containing the user's (canonical) User ID if +> authentication was successful, and `None` if not. +> +> As with `check_auth`, the `Deferred` may alternatively resolve to a +> `(user_id, callback)` tuple. + +`someprovider.check_password`(*user_id*, *password*) + +> This method provides a simpler interface than +> `get_supported_login_types` and `check_auth` for password auth +> providers that just want to provide a mechanism for validating +> `m.login.password` logins. +> +> Iif implemented, it will be called to check logins with an +> `m.login.password` login type. It is passed a qualified +> `@localpart:domain` user id, and the password provided by the user. +> +> The method should return a Twisted `Deferred` object, which resolves +> to `True` if authentication is successful, and `False` if not. + +`someprovider.on_logged_out`(*user_id*, *device_id*, *access_token*) + +> This method, if implemented, is called when a user logs out. It is +> passed the qualified user ID, the ID of the deactivated device (if +> any: access tokens are occasionally created without an associated +> device ID), and the (now deactivated) access token. +> +> It may return a Twisted `Deferred` object; the logout request will +> wait for the deferred to complete but the result is ignored. diff --git a/docs/password_auth_providers.rst b/docs/password_auth_providers.rst deleted file mode 100644 index 6149ba745..000000000 --- a/docs/password_auth_providers.rst +++ /dev/null @@ -1,113 +0,0 @@ -Password auth provider modules -============================== - -Password auth providers offer a way for server administrators to integrate -their Synapse installation with an existing authentication system. - -A password auth provider is a Python class which is dynamically loaded into -Synapse, and provides a number of methods by which it can integrate with the -authentication system. - -This document serves as a reference for those looking to implement their own -password auth providers. - -Required methods ----------------- - -Password auth provider classes must provide the following methods: - -*class* ``SomeProvider.parse_config``\(*config*) - - This method is passed the ``config`` object for this module from the - homeserver configuration file. - - It should perform any appropriate sanity checks on the provided - configuration, and return an object which is then passed into ``__init__``. - -*class* ``SomeProvider``\(*config*, *account_handler*) - - The constructor is passed the config object returned by ``parse_config``, - and a ``synapse.module_api.ModuleApi`` object which allows the - password provider to check if accounts exist and/or create new ones. - -Optional methods ----------------- - -Password auth provider classes may optionally provide the following methods. - -*class* ``SomeProvider.get_db_schema_files``\() - - This method, if implemented, should return an Iterable of ``(name, - stream)`` pairs of database schema files. Each file is applied in turn at - initialisation, and a record is then made in the database so that it is - not re-applied on the next start. - -``someprovider.get_supported_login_types``\() - - This method, if implemented, should return a ``dict`` mapping from a login - type identifier (such as ``m.login.password``) to an iterable giving the - fields which must be provided by the user in the submission to the - ``/login`` api. These fields are passed in the ``login_dict`` dictionary - to ``check_auth``. - - For example, if a password auth provider wants to implement a custom login - type of ``com.example.custom_login``, where the client is expected to pass - the fields ``secret1`` and ``secret2``, the provider should implement this - method and return the following dict:: - - {"com.example.custom_login": ("secret1", "secret2")} - -``someprovider.check_auth``\(*username*, *login_type*, *login_dict*) - - This method is the one that does the real work. If implemented, it will be - called for each login attempt where the login type matches one of the keys - returned by ``get_supported_login_types``. - - It is passed the (possibly UNqualified) ``user`` provided by the client, - the login type, and a dictionary of login secrets passed by the client. - - The method should return a Twisted ``Deferred`` object, which resolves to - the canonical ``@localpart:domain`` user id if authentication is successful, - and ``None`` if not. - - Alternatively, the ``Deferred`` can resolve to a ``(str, func)`` tuple, in - which case the second field is a callback which will be called with the - result from the ``/login`` call (including ``access_token``, ``device_id``, - etc.) - -``someprovider.check_3pid_auth``\(*medium*, *address*, *password*) - - This method, if implemented, is called when a user attempts to register or - log in with a third party identifier, such as email. It is passed the - medium (ex. "email"), an address (ex. "jdoe@example.com") and the user's - password. - - The method should return a Twisted ``Deferred`` object, which resolves to - a ``str`` containing the user's (canonical) User ID if authentication was - successful, and ``None`` if not. - - As with ``check_auth``, the ``Deferred`` may alternatively resolve to a - ``(user_id, callback)`` tuple. - -``someprovider.check_password``\(*user_id*, *password*) - - This method provides a simpler interface than ``get_supported_login_types`` - and ``check_auth`` for password auth providers that just want to provide a - mechanism for validating ``m.login.password`` logins. - - Iif implemented, it will be called to check logins with an - ``m.login.password`` login type. It is passed a qualified - ``@localpart:domain`` user id, and the password provided by the user. - - The method should return a Twisted ``Deferred`` object, which resolves to - ``True`` if authentication is successful, and ``False`` if not. - -``someprovider.on_logged_out``\(*user_id*, *device_id*, *access_token*) - - This method, if implemented, is called when a user logs out. It is passed - the qualified user ID, the ID of the deactivated device (if any: access - tokens are occasionally created without an associated device ID), and the - (now deactivated) access token. - - It may return a Twisted ``Deferred`` object; the logout request will wait - for the deferred to complete but the result is ignored. diff --git a/docs/postgres.md b/docs/postgres.md new file mode 100644 index 000000000..29cf76285 --- /dev/null +++ b/docs/postgres.md @@ -0,0 +1,164 @@ +# Using Postgres + +Postgres version 9.5 or later is known to work. + +## Install postgres client libraries + +Synapse will require the python postgres client library in order to +connect to a postgres database. + +- If you are using the [matrix.org debian/ubuntu + packages](../INSTALL.md#matrixorg-packages), the necessary python + library will already be installed, but you will need to ensure the + low-level postgres library is installed, which you can do with + `apt install libpq5`. +- For other pre-built packages, please consult the documentation from + the relevant package. +- If you installed synapse [in a + virtualenv](../INSTALL.md#installing-from-source), you can install + the library with: + + ~/synapse/env/bin/pip install matrix-synapse[postgres] + + (substituting the path to your virtualenv for `~/synapse/env`, if + you used a different path). You will require the postgres + development files. These are in the `libpq-dev` package on + Debian-derived distributions. + +## Set up database + +Assuming your PostgreSQL database user is called `postgres`, create a +user `synapse_user` with: + + su - postgres + createuser --pwprompt synapse_user + +Before you can authenticate with the `synapse_user`, you must create a +database that it can access. To create a database, first connect to the +database with your database user: + + su - postgres + psql + +and then run: + + CREATE DATABASE synapse + ENCODING 'UTF8' + LC_COLLATE='C' + LC_CTYPE='C' + template=template0 + OWNER synapse_user; + +This would create an appropriate database named `synapse` owned by the +`synapse_user` user (which must already have been created as above). + +Note that the PostgreSQL database *must* have the correct encoding set +(as shown above), otherwise it will not be able to store UTF8 strings. + +You may need to enable password authentication so `synapse_user` can +connect to the database. See +. + +## Tuning Postgres + +The default settings should be fine for most deployments. For larger +scale deployments tuning some of the settings is recommended, details of +which can be found at +. + +In particular, we've found tuning the following values helpful for +performance: + +- `shared_buffers` +- `effective_cache_size` +- `work_mem` +- `maintenance_work_mem` +- `autovacuum_work_mem` + +Note that the appropriate values for those fields depend on the amount +of free memory the database host has available. + +## Synapse config + +When you are ready to start using PostgreSQL, edit the `database` +section in your config file to match the following lines: + + database: + name: psycopg2 + args: + user: + password: + database: + host: + cp_min: 5 + cp_max: 10 + +All key, values in `args` are passed to the `psycopg2.connect(..)` +function, except keys beginning with `cp_`, which are consumed by the +twisted adbapi connection pool. + +## Porting from SQLite + +### Overview + +The script `synapse_port_db` allows porting an existing synapse server +backed by SQLite to using PostgreSQL. This is done in as a two phase +process: + +1. Copy the existing SQLite database to a separate location (while the + server is down) and running the port script against that offline + database. +2. Shut down the server. Rerun the port script to port any data that + has come in since taking the first snapshot. Restart server against + the PostgreSQL database. + +The port script is designed to be run repeatedly against newer snapshots +of the SQLite database file. This makes it safe to repeat step 1 if +there was a delay between taking the previous snapshot and being ready +to do step 2. + +It is safe to at any time kill the port script and restart it. + +### Using the port script + +Firstly, shut down the currently running synapse server and copy its +database file (typically `homeserver.db`) to another location. Once the +copy is complete, restart synapse. For instance: + + ./synctl stop + cp homeserver.db homeserver.db.snapshot + ./synctl start + +Copy the old config file into a new config file: + + cp homeserver.yaml homeserver-postgres.yaml + +Edit the database section as described in the section *Synapse config* +above and with the SQLite snapshot located at `homeserver.db.snapshot` +simply run: + + synapse_port_db --sqlite-database homeserver.db.snapshot \ + --postgres-config homeserver-postgres.yaml + +The flag `--curses` displays a coloured curses progress UI. + +If the script took a long time to complete, or time has otherwise passed +since the original snapshot was taken, repeat the previous steps with a +newer snapshot. + +To complete the conversion shut down the synapse server and run the port +script one last time, e.g. if the SQLite database is at `homeserver.db` +run: + + synapse_port_db --sqlite-database homeserver.db \ + --postgres-config homeserver-postgres.yaml + +Once that has completed, change the synapse config to point at the +PostgreSQL database configuration file `homeserver-postgres.yaml`: + + ./synctl stop + mv homeserver.yaml homeserver-old-sqlite.yaml + mv homeserver-postgres.yaml homeserver.yaml + ./synctl start + +Synapse should now be running against PostgreSQL. diff --git a/docs/postgres.rst b/docs/postgres.rst deleted file mode 100644 index e08a5116b..000000000 --- a/docs/postgres.rst +++ /dev/null @@ -1,166 +0,0 @@ -Using Postgres --------------- - -Postgres version 9.5 or later is known to work. - -Install postgres client libraries -================================= - -Synapse will require the python postgres client library in order to connect to -a postgres database. - -* If you are using the `matrix.org debian/ubuntu - packages <../INSTALL.md#matrixorg-packages>`_, - the necessary python library will already be installed, but you will need to - ensure the low-level postgres library is installed, which you can do with - ``apt install libpq5``. - -* For other pre-built packages, please consult the documentation from the - relevant package. - -* If you installed synapse `in a virtualenv - <../INSTALL.md#installing-from-source>`_, you can install the library with:: - - ~/synapse/env/bin/pip install matrix-synapse[postgres] - - (substituting the path to your virtualenv for ``~/synapse/env``, if you used a - different path). You will require the postgres development files. These are in - the ``libpq-dev`` package on Debian-derived distributions. - -Set up database -=============== - -Assuming your PostgreSQL database user is called ``postgres``, create a user -``synapse_user`` with:: - - su - postgres - createuser --pwprompt synapse_user - -Before you can authenticate with the ``synapse_user``, you must create a -database that it can access. To create a database, first connect to the database -with your database user:: - - su - postgres - psql - -and then run:: - - CREATE DATABASE synapse - ENCODING 'UTF8' - LC_COLLATE='C' - LC_CTYPE='C' - template=template0 - OWNER synapse_user; - -This would create an appropriate database named ``synapse`` owned by the -``synapse_user`` user (which must already have been created as above). - -Note that the PostgreSQL database *must* have the correct encoding set (as -shown above), otherwise it will not be able to store UTF8 strings. - -You may need to enable password authentication so ``synapse_user`` can connect -to the database. See https://www.postgresql.org/docs/11/auth-pg-hba-conf.html. - -Tuning Postgres -=============== - -The default settings should be fine for most deployments. For larger scale -deployments tuning some of the settings is recommended, details of which can be -found at https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server. - -In particular, we've found tuning the following values helpful for performance: - -- ``shared_buffers`` -- ``effective_cache_size`` -- ``work_mem`` -- ``maintenance_work_mem`` -- ``autovacuum_work_mem`` - -Note that the appropriate values for those fields depend on the amount of free -memory the database host has available. - -Synapse config -============== - -When you are ready to start using PostgreSQL, edit the ``database`` section in -your config file to match the following lines:: - - database: - name: psycopg2 - args: - user: - password: - database: - host: - cp_min: 5 - cp_max: 10 - -All key, values in ``args`` are passed to the ``psycopg2.connect(..)`` -function, except keys beginning with ``cp_``, which are consumed by the twisted -adbapi connection pool. - - -Porting from SQLite -=================== - -Overview -~~~~~~~~ - -The script ``synapse_port_db`` allows porting an existing synapse server -backed by SQLite to using PostgreSQL. This is done in as a two phase process: - -1. Copy the existing SQLite database to a separate location (while the server - is down) and running the port script against that offline database. -2. Shut down the server. Rerun the port script to port any data that has come - in since taking the first snapshot. Restart server against the PostgreSQL - database. - -The port script is designed to be run repeatedly against newer snapshots of the -SQLite database file. This makes it safe to repeat step 1 if there was a delay -between taking the previous snapshot and being ready to do step 2. - -It is safe to at any time kill the port script and restart it. - -Using the port script -~~~~~~~~~~~~~~~~~~~~~ - -Firstly, shut down the currently running synapse server and copy its database -file (typically ``homeserver.db``) to another location. Once the copy is -complete, restart synapse. For instance:: - - ./synctl stop - cp homeserver.db homeserver.db.snapshot - ./synctl start - -Copy the old config file into a new config file:: - - cp homeserver.yaml homeserver-postgres.yaml - -Edit the database section as described in the section *Synapse config* above -and with the SQLite snapshot located at ``homeserver.db.snapshot`` simply run:: - - synapse_port_db --sqlite-database homeserver.db.snapshot \ - --postgres-config homeserver-postgres.yaml - -The flag ``--curses`` displays a coloured curses progress UI. - -If the script took a long time to complete, or time has otherwise passed since -the original snapshot was taken, repeat the previous steps with a newer -snapshot. - -To complete the conversion shut down the synapse server and run the port -script one last time, e.g. if the SQLite database is at ``homeserver.db`` -run:: - - synapse_port_db --sqlite-database homeserver.db \ - --postgres-config homeserver-postgres.yaml - -Once that has completed, change the synapse config to point at the PostgreSQL -database configuration file ``homeserver-postgres.yaml``:: - - ./synctl stop - mv homeserver.yaml homeserver-old-sqlite.yaml - mv homeserver-postgres.yaml homeserver.yaml - ./synctl start - -Synapse should now be running against PostgreSQL. diff --git a/docs/replication.md b/docs/replication.md new file mode 100644 index 000000000..ed8823315 --- /dev/null +++ b/docs/replication.md @@ -0,0 +1,37 @@ +# Replication Architecture + +## Motivation + +We'd like to be able to split some of the work that synapse does into +multiple python processes. In theory multiple synapse processes could +share a single postgresql database and we\'d scale up by running more +synapse processes. However much of synapse assumes that only one process +is interacting with the database, both for assigning unique identifiers +when inserting into tables, notifying components about new updates, and +for invalidating its caches. + +So running multiple copies of the current code isn't an option. One way +to run multiple processes would be to have a single writer process and +multiple reader processes connected to the same database. In order to do +this we'd need a way for the reader process to invalidate its in-memory +caches when an update happens on the writer. One way to do this is for +the writer to present an append-only log of updates which the readers +can consume to invalidate their caches and to push updates to listening +clients or pushers. + +Synapse already stores much of its data as an append-only log so that it +can correctly respond to `/sync` requests so the amount of code changes +needed to expose the append-only log to the readers should be fairly +minimal. + +## Architecture + +### The Replication Protocol + +See [tcp_replication.md](tcp_replication.md) + +### The Slaved DataStore + +There are read-only version of the synapse storage layer in +`synapse/replication/slave/storage` that use the response of the +replication API to invalidate their caches. diff --git a/docs/replication.rst b/docs/replication.rst deleted file mode 100644 index 310abb348..000000000 --- a/docs/replication.rst +++ /dev/null @@ -1,40 +0,0 @@ -Replication Architecture -======================== - -Motivation ----------- - -We'd like to be able to split some of the work that synapse does into multiple -python processes. In theory multiple synapse processes could share a single -postgresql database and we'd scale up by running more synapse processes. -However much of synapse assumes that only one process is interacting with the -database, both for assigning unique identifiers when inserting into tables, -notifying components about new updates, and for invalidating its caches. - -So running multiple copies of the current code isn't an option. One way to -run multiple processes would be to have a single writer process and multiple -reader processes connected to the same database. In order to do this we'd need -a way for the reader process to invalidate its in-memory caches when an update -happens on the writer. One way to do this is for the writer to present an -append-only log of updates which the readers can consume to invalidate their -caches and to push updates to listening clients or pushers. - -Synapse already stores much of its data as an append-only log so that it can -correctly respond to /sync requests so the amount of code changes needed to -expose the append-only log to the readers should be fairly minimal. - -Architecture ------------- - -The Replication Protocol -~~~~~~~~~~~~~~~~~~~~~~~~ - -See ``tcp_replication.rst`` - - -The Slaved DataStore -~~~~~~~~~~~~~~~~~~~~ - -There are read-only version of the synapse storage layer in -``synapse/replication/slave/storage`` that use the response of the replication -API to invalidate their caches. diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md new file mode 100644 index 000000000..dcfc5c64a --- /dev/null +++ b/docs/reverse_proxy.md @@ -0,0 +1,123 @@ +# Using a reverse proxy with Synapse + +It is recommended to put a reverse proxy such as +[nginx](https://nginx.org/en/docs/http/ngx_http_proxy_module.html), +[Apache](https://httpd.apache.org/docs/current/mod/mod_proxy_http.html), +[Caddy](https://caddyserver.com/docs/proxy) or +[HAProxy](https://www.haproxy.org/) in front of Synapse. One advantage +of doing so is that it means that you can expose the default https port +(443) to Matrix clients without needing to run Synapse with root +privileges. + +> **NOTE**: Your reverse proxy must not `canonicalise` or `normalise` +the requested URI in any way (for example, by decoding `%xx` escapes). +Beware that Apache *will* canonicalise URIs unless you specifify +`nocanon`. + +When setting up a reverse proxy, remember that Matrix clients and other +Matrix servers do not necessarily need to connect to your server via the +same server name or port. Indeed, clients will use port 443 by default, +whereas servers default to port 8448. Where these are different, we +refer to the 'client port' and the \'federation port\'. See [Setting +up federation](federate.md) for more details of the algorithm used for +federation connections. + +Let's assume that we expect clients to connect to our server at +`https://matrix.example.com`, and other servers to connect at +`https://example.com:8448`. The following sections detail the configuration of +the reverse proxy and the homeserver. + +## Webserver configuration examples + +> **NOTE**: You only need one of these. + +### nginx + + server { + listen 443 ssl; + listen [::]:443 ssl; + server_name matrix.example.com; + + location /_matrix { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + + server { + listen 8448 ssl default_server; + listen [::]:8448 ssl default_server; + server_name example.com; + + location / { + proxy_pass http://localhost:8008; + proxy_set_header X-Forwarded-For $remote_addr; + } + } + +> **NOTE**: Do not add a `/` after the port in `proxy_pass`, otherwise nginx will +canonicalise/normalise the URI. + +### Caddy + + matrix.example.com { + proxy /_matrix http://localhost:8008 { + transparent + } + } + + example.com:8448 { + proxy / http://localhost:8008 { + transparent + } + } + +### Apache + + + SSLEngine on + ServerName matrix.example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + + + SSLEngine on + ServerName example.com; + + AllowEncodedSlashes NoDecode + ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon + ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix + + +> **NOTE**: ensure the `nocanon` options are included. + +### HAProxy + + frontend https + bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 + + # Matrix client traffic + acl matrix-host hdr(host) -i matrix.example.com + acl matrix-path path_beg /_matrix + + use_backend matrix if matrix-host matrix-path + + frontend matrix-federation + bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 + default_backend matrix + + backend matrix + server matrix 127.0.0.1:8008 + +## Homeserver Configuration + +You will also want to set `bind_addresses: ['127.0.0.1']` and +`x_forwarded: true` for port 8008 in `homeserver.yaml` to ensure that +client IP addresses are recorded correctly. + +Having done so, you can then use `https://matrix.example.com` (instead +of `https://matrix.example.com:8448`) as the "Custom server" when +connecting to Synapse from a client. diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst deleted file mode 100644 index 4b640ffc4..000000000 --- a/docs/reverse_proxy.rst +++ /dev/null @@ -1,112 +0,0 @@ -Using a reverse proxy with Synapse -================================== - -It is recommended to put a reverse proxy such as -`nginx `_, -`Apache `_, -`Caddy `_ or -`HAProxy `_ in front of Synapse. One advantage of -doing so is that it means that you can expose the default https port (443) to -Matrix clients without needing to run Synapse with root privileges. - -**NOTE**: Your reverse proxy must not 'canonicalise' or 'normalise' the -requested URI in any way (for example, by decoding ``%xx`` escapes). Beware -that Apache *will* canonicalise URIs unless you specifify ``nocanon``. - -When setting up a reverse proxy, remember that Matrix clients and other Matrix -servers do not necessarily need to connect to your server via the same server -name or port. Indeed, clients will use port 443 by default, whereas servers -default to port 8448. Where these are different, we refer to the 'client port' -and the 'federation port'. See `Setting up federation -`_ for more details of the algorithm used for -federation connections. - -Let's assume that we expect clients to connect to our server at -``https://matrix.example.com``, and other servers to connect at -``https://example.com:8448``. Here are some example configurations: - -* nginx:: - - server { - listen 443 ssl; - listen [::]:443 ssl; - server_name matrix.example.com; - - location /_matrix { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - server { - listen 8448 ssl default_server; - listen [::]:8448 ssl default_server; - server_name example.com; - - location / { - proxy_pass http://localhost:8008; - proxy_set_header X-Forwarded-For $remote_addr; - } - } - - Do not add a `/` after the port in `proxy_pass`, otherwise nginx will canonicalise/normalise the URI. - -* Caddy:: - - matrix.example.com { - proxy /_matrix http://localhost:8008 { - transparent - } - } - - example.com:8448 { - proxy / http://localhost:8008 { - transparent - } - } - -* Apache (note the ``nocanon`` options here!):: - - - SSLEngine on - ServerName matrix.example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - - - SSLEngine on - ServerName example.com; - - AllowEncodedSlashes NoDecode - ProxyPass /_matrix http://127.0.0.1:8008/_matrix nocanon - ProxyPassReverse /_matrix http://127.0.0.1:8008/_matrix - - -* HAProxy:: - - frontend https - bind :::443 v4v6 ssl crt /etc/ssl/haproxy/ strict-sni alpn h2,http/1.1 - - # Matrix client traffic - acl matrix-host hdr(host) -i matrix.example.com - acl matrix-path path_beg /_matrix - - use_backend matrix if matrix-host matrix-path - - frontend matrix-federation - bind :::8448 v4v6 ssl crt /etc/ssl/haproxy/synapse.pem alpn h2,http/1.1 - default_backend matrix - - backend matrix - server matrix 127.0.0.1:8008 - -You will also want to set ``bind_addresses: ['127.0.0.1']`` and ``x_forwarded: true`` -for port 8008 in ``homeserver.yaml`` to ensure that client IP addresses are -recorded correctly. - -Having done so, you can then use ``https://matrix.example.com`` (instead of -``https://matrix.example.com:8448``) as the "Custom server" when connecting to -Synapse from a client. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 93c0edd8c..8226978ba 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -110,6 +110,9 @@ pid_file: DATADIR/homeserver.pid # blacklist IP address CIDR ranges. If this option is not specified, or # specified with an empty list, no ip range blacklist will be enforced. # +# As of Synapse v1.4.0 this option also affects any outbound requests to identity +# servers provided by user input. +# # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly # listed here, since they correspond to unroutable addresses.) # @@ -136,8 +139,8 @@ federation_ip_range_blacklist: # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), -# 'metrics' (see docs/metrics-howto.rst), -# 'replication' (see docs/workers.rst). +# 'metrics' (see docs/metrics-howto.md), +# 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -172,12 +175,12 @@ federation_ip_range_blacklist: # # media: the media API (/_matrix/media). # -# metrics: the metrics interface. See docs/metrics-howto.rst. +# metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See -# docs/workers.rst. +# docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -201,7 +204,7 @@ listeners: # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # - port: 8008 tls: false @@ -306,6 +309,19 @@ listeners: # #allow_per_room_profiles: false +# How long to keep redacted events in unredacted form in the database. After +# this period redacted events get replaced with their redacted form in the DB. +# +# Defaults to `7d`. Set to `null` to disable. +# +#redaction_retention_period: 28d + +# How long to track users' last seen time and IPs in the database. +# +# Defaults to `28d`. Set to `null` to disable clearing out of old rows. +# +#user_ips_max_age: 14d + ## TLS ## @@ -511,6 +527,9 @@ log_config: "CONFDIR/SERVERNAME.log.config" # - one for login that ratelimits login requests based on the account the # client is attempting to log into, based on the amount of failed login # attempts for this account. +# - one for ratelimiting redactions by room admins. If this is not explicitly +# set then it uses the same ratelimiting as per rc_message. This is useful +# to allow room admins to deal with abuse quickly. # # The defaults are as shown below. # @@ -532,6 +551,10 @@ log_config: "CONFDIR/SERVERNAME.log.config" # failed_attempts: # per_second: 0.17 # burst_count: 3 +# +#rc_admin_redaction: +# per_second: 1 +# burst_count: 50 # Ratelimiting settings for incoming federation @@ -923,8 +946,10 @@ uploads_path: "DATADIR/uploads" # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # +# If a delegate is specified, the config option public_baseurl must also be filled out. +# account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined @@ -971,6 +996,11 @@ metrics_flags: # Whether or not to report anonymized homeserver usage statistics. # report_stats: true|false +# The endpoint to report the anonymized homeserver usage statistics to. +# Defaults to https://matrix.org/report-usage-stats/push +# +#report_stats_endpoint: https://example.com/report-usage-stats/push + ## API Configuration ## @@ -1042,6 +1072,10 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # This setting supercedes an older setting named `perspectives`. The old format # is still supported for backwards-compatibility, but it is deprecated. # +# 'trusted_key_servers' defaults to matrix.org, but using it will generate a +# warning on start-up. To suppress this warning, set +# 'suppress_key_server_warning' to true. +# # Options for each entry in the list include: # # server_name: the name of the server. required. @@ -1066,11 +1100,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr" # - server_name: "my_other_trusted_server.example.com" # -# The default configuration is: -# -#trusted_key_servers: -# - server_name: "matrix.org" +trusted_key_servers: + - server_name: "matrix.org" + +# Uncomment the following to disable the warning that is emitted when the +# trusted_key_servers include 'matrix.org'. See above. # +#suppress_key_server_warning: true # The signing keys to use when acting as a trusted key server. If not specified # defaults to the server signing key. @@ -1082,12 +1118,13 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # Enable SAML2 for registration and login. Uses pysaml2. # -# `sp_config` is the configuration for the pysaml2 Service Provider. -# See pysaml2 docs for format of config. +# At least one of `sp_config` or `config_path` must be set in this section to +# enable SAML login. # -# Default values will be used for the 'entityid' and 'service' settings, -# so it is not normally necessary to specify them unless you need to -# override them. +# (You will probably also want to set the following options to `false` to +# disable the regular login/registration flows: +# * enable_registration +# * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -1095,52 +1132,85 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # -#saml2_config: -# sp_config: -# # point this to the IdP's metadata. You can use either a local file or -# # (preferably) a URL. -# metadata: -# #local: ["saml2/idp.xml"] -# remote: -# - url: https://our_idp/metadata.xml -# -# # By default, the user has to go to our login page first. If you'd like to -# # allow IdP-initiated login, set 'allow_unsolicited: True' in a -# # 'service.sp' section: -# # -# #service: -# # sp: -# # allow_unsolicited: True -# -# # The examples below are just used to generate our metadata xml, and you -# # may well not need it, depending on your setup. Alternatively you -# # may need a whole lot more detail - see the pysaml2 docs! -# -# description: ["My awesome SP", "en"] -# name: ["Test SP", "en"] -# -# organization: -# name: Example com -# display_name: -# - ["Example co", "en"] -# url: "http://example.com" -# -# contact_person: -# - given_name: Bob -# sur_name: "the Sysadmin" -# email_address": ["admin@example.com"] -# contact_type": technical -# -# # Instead of putting the config inline as above, you can specify a -# # separate pysaml2 configuration file: -# # -# config_path: "CONFDIR/sp_conf.py" -# -# # the lifetime of a SAML session. This defines how long a user has to -# # complete the authentication process, if allow_unsolicited is unset. -# # The default is 5 minutes. -# # -# # saml_session_lifetime: 5m +saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "CONFDIR/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m + + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn @@ -1150,6 +1220,7 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # enabled: true # server_url: "https://cas-server.com" # service_url: "https://homeserver.domain.com:8448" +# #displayname_attribute: name # #required_attributes: # # name: value @@ -1242,6 +1313,12 @@ password_config: # #registration_template_html: registration.html # #registration_template_text: registration.txt # +# # Templates for validation emails sent by the homeserver when adding an email to +# # your user account +# # +# #add_threepid_template_html: add_threepid.html +# #add_threepid_template_text: add_threepid.txt +# # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # @@ -1253,6 +1330,12 @@ password_config: # # # #registration_template_success_html: registration_success.html # #registration_template_failure_html: registration_failure.html +# +# # Templates for success and failure pages that a user will see after attempting +# # to add an email or phone to their account +# # +# #add_threepid_success_html: add_threepid_success.html +# #add_threepid_failure_html: add_threepid_failure.html #password_providers: diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md new file mode 100644 index 000000000..e099d8a87 --- /dev/null +++ b/docs/tcp_replication.md @@ -0,0 +1,249 @@ +# TCP Replication + +## Motivation + +Previously the workers used an HTTP long poll mechanism to get updates +from the master, which had the problem of causing a lot of duplicate +work on the server. This TCP protocol replaces those APIs with the aim +of increased efficiency. + +## Overview + +The protocol is based on fire and forget, line based commands. An +example flow would be (where '>' indicates master to worker and +'<' worker to master flows): + + > SERVER example.com + < REPLICATE events 53 + > RDATA events 54 ["$foo1:bar.com", ...] + > RDATA events 55 ["$foo4:bar.com", ...] + +The example shows the server accepting a new connection and sending its +identity with the `SERVER` command, followed by the client asking to +subscribe to the `events` stream from the token `53`. The server then +periodically sends `RDATA` commands which have the format +`RDATA `, where the format of `` is +defined by the individual streams. + +Error reporting happens by either the client or server sending an ERROR +command, and usually the connection will be closed. + +Since the protocol is a simple line based, its possible to manually +connect to the server using a tool like netcat. A few things should be +noted when manually using the protocol: + +- When subscribing to a stream using `REPLICATE`, the special token + `NOW` can be used to get all future updates. The special stream name + `ALL` can be used with `NOW` to subscribe to all available streams. +- The federation stream is only available if federation sending has + been disabled on the main process. +- The server will only time connections out that have sent a `PING` + command. If a ping is sent then the connection will be closed if no + further commands are receieved within 15s. Both the client and + server protocol implementations will send an initial PING on + connection and ensure at least one command every 5s is sent (not + necessarily `PING`). +- `RDATA` commands *usually* include a numeric token, however if the + stream has multiple rows to replicate per token the server will send + multiple `RDATA` commands, with all but the last having a token of + `batch`. See the documentation on `commands.RdataCommand` for + further details. + +## Architecture + +The basic structure of the protocol is line based, where the initial +word of each line specifies the command. The rest of the line is parsed +based on the command. For example, the RDATA command is defined as: + + RDATA + +(Note that may contains spaces, but cannot contain +newlines.) + +Blank lines are ignored. + +### Keep alives + +Both sides are expected to send at least one command every 5s or so, and +should send a `PING` command if necessary. If either side do not receive +a command within e.g. 15s then the connection should be closed. + +Because the server may be connected to manually using e.g. netcat, the +timeouts aren't enabled until an initial `PING` command is seen. Both +the client and server implementations below send a `PING` command +immediately on connection to ensure the timeouts are enabled. + +This ensures that both sides can quickly realize if the tcp connection +has gone and handle the situation appropriately. + +### Start up + +When a new connection is made, the server: + +- Sends a `SERVER` command, which includes the identity of the server, + allowing the client to detect if its connected to the expected + server +- Sends a `PING` command as above, to enable the client to time out + connections promptly. + +The client: + +- Sends a `NAME` command, allowing the server to associate a human + friendly name with the connection. This is optional. +- Sends a `PING` as above +- For each stream the client wishes to subscribe to it sends a + `REPLICATE` with the `stream_name` and token it wants to subscribe + from. +- On receipt of a `SERVER` command, checks that the server name + matches the expected server name. + +### Error handling + +If either side detects an error it can send an `ERROR` command and close +the connection. + +If the client side loses the connection to the server it should +reconnect, following the steps above. + +### Congestion + +If the server sends messages faster than the client can consume them the +server will first buffer a (fairly large) number of commands and then +disconnect the client. This ensures that we don't queue up an unbounded +number of commands in memory and gives us a potential oppurtunity to +squawk loudly. When/if the client recovers it can reconnect to the +server and ask for missed messages. + +### Reliability + +In general the replication stream should be considered an unreliable +transport since e.g. commands are not resent if the connection +disappears. + +The exception to that are the replication streams, i.e. RDATA commands, +since these include tokens which can be used to restart the stream on +connection errors. + +The client should keep track of the token in the last RDATA command +received for each stream so that on reconneciton it can start streaming +from the correct place. Note: not all RDATA have valid tokens due to +batching. See `RdataCommand` for more details. + +### Example + +An example iteraction is shown below. Each line is prefixed with '>' +or '<' to indicate which side is sending, these are *not* included on +the wire: + + * connection established * + > SERVER localhost:8823 + > PING 1490197665618 + < NAME synapse.app.appservice + < PING 1490197665618 + < REPLICATE events 1 + < REPLICATE backfill 1 + < REPLICATE caches 1 + > POSITION events 1 + > POSITION backfill 1 + > POSITION caches 1 + > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] + > RDATA events 14 ["$149019767112vOHxz:localhost:8823", + "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] + < PING 1490197675618 + > ERROR server stopping + * connection closed by server * + +The `POSITION` command sent by the server is used to set the clients +position without needing to send data with the `RDATA` command. + +An example of a batched set of `RDATA` is: + + > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] + > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] + > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] + +In this case the client shouldn't advance their caches token until it +sees the the last `RDATA`. + +### List of commands + +The list of valid commands, with which side can send it: server (S) or +client (C): + +#### SERVER (S) + + Sent at the start to identify which server the client is talking to + +#### RDATA (S) + + A single update in a stream + +#### POSITION (S) + + The position of the stream has been updated. Sent to the client + after all missing updates for a stream have been sent to the client + and they're now up to date. + +#### ERROR (S, C) + + There was an error + +#### PING (S, C) + + Sent periodically to ensure the connection is still alive + +#### NAME (C) + + Sent at the start by client to inform the server who they are + +#### REPLICATE (C) + + Asks the server to replicate a given stream + +#### USER_SYNC (C) + + A user has started or stopped syncing + +#### FEDERATION_ACK (C) + + Acknowledge receipt of some federation data + +#### REMOVE_PUSHER (C) + + Inform the server a pusher should be removed + +#### INVALIDATE_CACHE (C) + + Inform the server a cache should be invalidated + +#### SYNC (S, C) + + Used exclusively in tests + +See `synapse/replication/tcp/commands.py` for a detailed description and +the format of each command. + +### Cache Invalidation Stream + +The cache invalidation stream is used to inform workers when they need +to invalidate any of their caches in the data store. This is done by +streaming all cache invalidations done on master down to the workers, +assuming that any caches on the workers also exist on the master. + +Each individual cache invalidation results in a row being sent down +replication, which includes the cache name (the name of the function) +and they key to invalidate. For example: + + > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] + +However, there are times when a number of caches need to be invalidated +at the same time with the same key. To reduce traffic we batch those +invalidations into a single poke by defining a special cache name that +workers understand to mean to expand to invalidate the correct caches. + +Currently the special cache names are declared in +`synapse/storage/_base.py` and are: + +1. `cs_cache_fake` ─ invalidates caches that depend on the current + state diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst deleted file mode 100644 index 75e723484..000000000 --- a/docs/tcp_replication.rst +++ /dev/null @@ -1,249 +0,0 @@ -TCP Replication -=============== - -Motivation ----------- - -Previously the workers used an HTTP long poll mechanism to get updates from the -master, which had the problem of causing a lot of duplicate work on the server. -This TCP protocol replaces those APIs with the aim of increased efficiency. - - - -Overview --------- - -The protocol is based on fire and forget, line based commands. An example flow -would be (where '>' indicates master to worker and '<' worker to master flows):: - - > SERVER example.com - < REPLICATE events 53 - > RDATA events 54 ["$foo1:bar.com", ...] - > RDATA events 55 ["$foo4:bar.com", ...] - -The example shows the server accepting a new connection and sending its identity -with the ``SERVER`` command, followed by the client asking to subscribe to the -``events`` stream from the token ``53``. The server then periodically sends ``RDATA`` -commands which have the format ``RDATA ``, where the -format of ```` is defined by the individual streams. - -Error reporting happens by either the client or server sending an `ERROR` -command, and usually the connection will be closed. - - -Since the protocol is a simple line based, its possible to manually connect to -the server using a tool like netcat. A few things should be noted when manually -using the protocol: - -* When subscribing to a stream using ``REPLICATE``, the special token ``NOW`` can - be used to get all future updates. The special stream name ``ALL`` can be used - with ``NOW`` to subscribe to all available streams. -* The federation stream is only available if federation sending has been - disabled on the main process. -* The server will only time connections out that have sent a ``PING`` command. - If a ping is sent then the connection will be closed if no further commands - are receieved within 15s. Both the client and server protocol implementations - will send an initial PING on connection and ensure at least one command every - 5s is sent (not necessarily ``PING``). -* ``RDATA`` commands *usually* include a numeric token, however if the stream - has multiple rows to replicate per token the server will send multiple - ``RDATA`` commands, with all but the last having a token of ``batch``. See - the documentation on ``commands.RdataCommand`` for further details. - - -Architecture ------------- - -The basic structure of the protocol is line based, where the initial word of -each line specifies the command. The rest of the line is parsed based on the -command. For example, the `RDATA` command is defined as:: - - RDATA - -(Note that `` may contains spaces, but cannot contain newlines.) - -Blank lines are ignored. - - -Keep alives -~~~~~~~~~~~ - -Both sides are expected to send at least one command every 5s or so, and -should send a ``PING`` command if necessary. If either side do not receive a -command within e.g. 15s then the connection should be closed. - -Because the server may be connected to manually using e.g. netcat, the timeouts -aren't enabled until an initial ``PING`` command is seen. Both the client and -server implementations below send a ``PING`` command immediately on connection to -ensure the timeouts are enabled. - -This ensures that both sides can quickly realize if the tcp connection has gone -and handle the situation appropriately. - - -Start up -~~~~~~~~ - -When a new connection is made, the server: - -* Sends a ``SERVER`` command, which includes the identity of the server, allowing - the client to detect if its connected to the expected server -* Sends a ``PING`` command as above, to enable the client to time out connections - promptly. - -The client: - -* Sends a ``NAME`` command, allowing the server to associate a human friendly - name with the connection. This is optional. -* Sends a ``PING`` as above -* For each stream the client wishes to subscribe to it sends a ``REPLICATE`` - with the stream_name and token it wants to subscribe from. -* On receipt of a ``SERVER`` command, checks that the server name matches the - expected server name. - - -Error handling -~~~~~~~~~~~~~~ - -If either side detects an error it can send an ``ERROR`` command and close the -connection. - -If the client side loses the connection to the server it should reconnect, -following the steps above. - - -Congestion -~~~~~~~~~~ - -If the server sends messages faster than the client can consume them the server -will first buffer a (fairly large) number of commands and then disconnect the -client. This ensures that we don't queue up an unbounded number of commands in -memory and gives us a potential oppurtunity to squawk loudly. When/if the client -recovers it can reconnect to the server and ask for missed messages. - - -Reliability -~~~~~~~~~~~ - -In general the replication stream should be considered an unreliable transport -since e.g. commands are not resent if the connection disappears. - -The exception to that are the replication streams, i.e. RDATA commands, since -these include tokens which can be used to restart the stream on connection -errors. - -The client should keep track of the token in the last RDATA command received -for each stream so that on reconneciton it can start streaming from the correct -place. Note: not all RDATA have valid tokens due to batching. See -``RdataCommand`` for more details. - -Example -~~~~~~~ - -An example iteraction is shown below. Each line is prefixed with '>' or '<' to -indicate which side is sending, these are *not* included on the wire:: - - * connection established * - > SERVER localhost:8823 - > PING 1490197665618 - < NAME synapse.app.appservice - < PING 1490197665618 - < REPLICATE events 1 - < REPLICATE backfill 1 - < REPLICATE caches 1 - > POSITION events 1 - > POSITION backfill 1 - > POSITION caches 1 - > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513] - > RDATA events 14 ["$149019767112vOHxz:localhost:8823", - "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null] - < PING 1490197675618 - > ERROR server stopping - * connection closed by server * - -The ``POSITION`` command sent by the server is used to set the clients position -without needing to send data with the ``RDATA`` command. - - -An example of a batched set of ``RDATA`` is:: - - > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513] - > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513] - > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513] - -In this case the client shouldn't advance their caches token until it sees the -the last ``RDATA``. - - -List of commands -~~~~~~~~~~~~~~~~ - -The list of valid commands, with which side can send it: server (S) or client (C): - -SERVER (S) - Sent at the start to identify which server the client is talking to - -RDATA (S) - A single update in a stream - -POSITION (S) - The position of the stream has been updated. Sent to the client after all - missing updates for a stream have been sent to the client and they're now - up to date. - -ERROR (S, C) - There was an error - -PING (S, C) - Sent periodically to ensure the connection is still alive - -NAME (C) - Sent at the start by client to inform the server who they are - -REPLICATE (C) - Asks the server to replicate a given stream - -USER_SYNC (C) - A user has started or stopped syncing - -FEDERATION_ACK (C) - Acknowledge receipt of some federation data - -REMOVE_PUSHER (C) - Inform the server a pusher should be removed - -INVALIDATE_CACHE (C) - Inform the server a cache should be invalidated - -SYNC (S, C) - Used exclusively in tests - - -See ``synapse/replication/tcp/commands.py`` for a detailed description and the -format of each command. - - -Cache Invalidation Stream -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The cache invalidation stream is used to inform workers when they need to -invalidate any of their caches in the data store. This is done by streaming all -cache invalidations done on master down to the workers, assuming that any caches -on the workers also exist on the master. - -Each individual cache invalidation results in a row being sent down replication, -which includes the cache name (the name of the function) and they key to -invalidate. For example:: - - > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] - -However, there are times when a number of caches need to be invalidated at the -same time with the same key. To reduce traffic we batch those invalidations into -a single poke by defining a special cache name that workers understand to mean -to expand to invalidate the correct caches. - -Currently the special cache names are declared in ``synapse/storage/_base.py`` -and are: - -1. ``cs_cache_fake`` ─ invalidates caches that depend on the current state diff --git a/docs/turn-howto.md b/docs/turn-howto.md new file mode 100644 index 000000000..4a983621e --- /dev/null +++ b/docs/turn-howto.md @@ -0,0 +1,123 @@ +# Overview + +This document explains how to enable VoIP relaying on your Home Server with +TURN. + +The synapse Matrix Home Server supports integration with TURN server via the +[TURN server REST API](). This +allows the Home Server to generate credentials that are valid for use on the +TURN server through the use of a secret shared between the Home Server and the +TURN server. + +The following sections describe how to install [coturn]() (which implements the TURN REST API) and integrate it with synapse. + +## `coturn` Setup + +### Initial installation + +The TURN daemon `coturn` is available from a variety of sources such as native package managers, or installation from source. + +#### Debian installation + + # apt install coturn + +#### Source installation + +1. Download the [latest release](https://github.com/coturn/coturn/releases/latest) from github. Unpack it and `cd` into the directory. + +1. Configure it: + + ./configure + + > You may need to install `libevent2`: if so, you should do so in + > the way recommended by your operating system. You can ignore + > warnings about lack of database support: a database is unnecessary + > for this purpose. + +1. Build and install it: + + make + make install + +1. Create or edit the config file in `/etc/turnserver.conf`. The relevant + lines, with example values, are: + + use-auth-secret + static-auth-secret=[your secret key here] + realm=turn.myserver.org + + See `turnserver.conf` for explanations of the options. One way to generate + the `static-auth-secret` is with `pwgen`: + + pwgen -s 64 1 + +1. Consider your security settings. TURN lets users request a relay which will + connect to arbitrary IP addresses and ports. The following configuration is + suggested as a minimum starting point: + + # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. + no-tcp-relay + + # don't let the relay ever try to connect to private IP address ranges within your network (if any) + # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. + denied-peer-ip=10.0.0.0-10.255.255.255 + denied-peer-ip=192.168.0.0-192.168.255.255 + denied-peer-ip=172.16.0.0-172.31.255.255 + + # special case the turn server itself so that client->TURN->TURN->client flows work + allowed-peer-ip=10.0.0.1 + + # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. + user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. + total-quota=1200 + + Ideally coturn should refuse to relay traffic which isn't SRTP; see + + +1. Ensure your firewall allows traffic into the TURN server on the ports + you've configured it to listen on (remember to allow both TCP and UDP TURN + traffic) + +1. If you've configured coturn to support TLS/DTLS, generate or import your + private key and certificate. + +1. Start the turn server: + + bin/turnserver -o + +## synapse Setup + +Your home server configuration file needs the following extra keys: + +1. "`turn_uris`": This needs to be a yaml list of public-facing URIs + for your TURN server to be given out to your clients. Add separate + entries for each transport your TURN server supports. +2. "`turn_shared_secret`": This is the secret shared between your + Home server and your TURN server, so you should set it to the same + string you used in turnserver.conf. +3. "`turn_user_lifetime`": This is the amount of time credentials + generated by your Home Server are valid for (in milliseconds). + Shorter times offer less potential for abuse at the expense of + increased traffic between web clients and your home server to + refresh credentials. The TURN REST API specification recommends + one day (86400000). +4. "`turn_allow_guests`": Whether to allow guest users to use the + TURN server. This is enabled by default, as otherwise VoIP will + not work reliably for guests. However, it does introduce a + security risk as it lets guests connect to arbitrary endpoints + without having gone through a CAPTCHA or similar to register a + real account. + +As an example, here is the relevant section of the config file for matrix.org: + + turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] + turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons + turn_user_lifetime: 86400000 + turn_allow_guests: True + +After updating the homeserver configuration, you must restart synapse: + + cd /where/you/run/synapse + ./synctl restart + +..and your Home Server now supports VoIP relaying! diff --git a/docs/turn-howto.rst b/docs/turn-howto.rst deleted file mode 100644 index a2fc5c882..000000000 --- a/docs/turn-howto.rst +++ /dev/null @@ -1,127 +0,0 @@ -How to enable VoIP relaying on your Home Server with TURN - -Overview --------- -The synapse Matrix Home Server supports integration with TURN server via the -TURN server REST API -(http://tools.ietf.org/html/draft-uberti-behave-turn-rest-00). This allows -the Home Server to generate credentials that are valid for use on the TURN -server through the use of a secret shared between the Home Server and the -TURN server. - -This document describes how to install coturn -(https://github.com/coturn/coturn) which also supports the TURN REST API, -and integrate it with synapse. - -coturn Setup -============ - -You may be able to setup coturn via your package manager, or set it up manually using the usual ``configure, make, make install`` process. - - 1. Check out coturn:: - - git clone https://github.com/coturn/coturn.git coturn - cd coturn - - 2. Configure it:: - - ./configure - - You may need to install ``libevent2``: if so, you should do so - in the way recommended by your operating system. - You can ignore warnings about lack of database support: a - database is unnecessary for this purpose. - - 3. Build and install it:: - - make - make install - - 4. Create or edit the config file in ``/etc/turnserver.conf``. The relevant - lines, with example values, are:: - - use-auth-secret - static-auth-secret=[your secret key here] - realm=turn.myserver.org - - See turnserver.conf for explanations of the options. - One way to generate the static-auth-secret is with pwgen:: - - pwgen -s 64 1 - - 5. Consider your security settings. TURN lets users request a relay - which will connect to arbitrary IP addresses and ports. At the least - we recommend:: - - # VoIP traffic is all UDP. There is no reason to let users connect to arbitrary TCP endpoints via the relay. - no-tcp-relay - - # don't let the relay ever try to connect to private IP address ranges within your network (if any) - # given the turn server is likely behind your firewall, remember to include any privileged public IPs too. - denied-peer-ip=10.0.0.0-10.255.255.255 - denied-peer-ip=192.168.0.0-192.168.255.255 - denied-peer-ip=172.16.0.0-172.31.255.255 - - # special case the turn server itself so that client->TURN->TURN->client flows work - allowed-peer-ip=10.0.0.1 - - # consider whether you want to limit the quota of relayed streams per user (or total) to avoid risk of DoS. - user-quota=12 # 4 streams per video call, so 12 streams = 3 simultaneous relayed calls per user. - total-quota=1200 - - Ideally coturn should refuse to relay traffic which isn't SRTP; - see https://github.com/matrix-org/synapse/issues/2009 - - 6. Ensure your firewall allows traffic into the TURN server on - the ports you've configured it to listen on (remember to allow - both TCP and UDP TURN traffic) - - 7. If you've configured coturn to support TLS/DTLS, generate or - import your private key and certificate. - - 8. Start the turn server:: - - bin/turnserver -o - - -synapse Setup -============= - -Your home server configuration file needs the following extra keys: - - 1. "turn_uris": This needs to be a yaml list - of public-facing URIs for your TURN server to be given out - to your clients. Add separate entries for each transport your - TURN server supports. - - 2. "turn_shared_secret": This is the secret shared between your Home - server and your TURN server, so you should set it to the same - string you used in turnserver.conf. - - 3. "turn_user_lifetime": This is the amount of time credentials - generated by your Home Server are valid for (in milliseconds). - Shorter times offer less potential for abuse at the expense - of increased traffic between web clients and your home server - to refresh credentials. The TURN REST API specification recommends - one day (86400000). - - 4. "turn_allow_guests": Whether to allow guest users to use the TURN - server. This is enabled by default, as otherwise VoIP will not - work reliably for guests. However, it does introduce a security risk - as it lets guests connect to arbitrary endpoints without having gone - through a CAPTCHA or similar to register a real account. - -As an example, here is the relevant section of the config file for -matrix.org:: - - turn_uris: [ "turn:turn.matrix.org:3478?transport=udp", "turn:turn.matrix.org:3478?transport=tcp" ] - turn_shared_secret: n0t4ctuAllymatr1Xd0TorgSshar3d5ecret4obvIousreAsons - turn_user_lifetime: 86400000 - turn_allow_guests: True - -Now, restart synapse:: - - cd /where/you/run/synapse - ./synctl restart - -...and your Home Server now supports VoIP relaying! diff --git a/docs/workers.rst b/docs/workers.md similarity index 78% rename from docs/workers.rst rename to docs/workers.md index e11e11741..4bd60ba0a 100644 --- a/docs/workers.rst +++ b/docs/workers.md @@ -1,5 +1,4 @@ -Scaling synapse via workers -=========================== +# Scaling synapse via workers Synapse has experimental support for splitting out functionality into multiple separate python processes, helping greatly with scalability. These @@ -20,17 +19,16 @@ TCP protocol called 'replication' - analogous to MySQL or Postgres style database replication; feeding a stream of relevant data to the workers so they can be kept in sync with the main synapse process and database state. -Configuration -------------- +## Configuration To make effective use of the workers, you will need to configure an HTTP reverse-proxy such as nginx or haproxy, which will direct incoming requests to the correct worker, or to the main synapse instance. Note that this includes -requests made to the federation port. See ``_ for -information on setting up a reverse proxy. +requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md) +for information on setting up a reverse proxy. To enable workers, you need to add two replication listeners to the master -synapse, e.g.:: +synapse, e.g.: listeners: # The TCP replication port @@ -56,7 +54,7 @@ You then create a set of configs for the various worker processes. These should be worker configuration files, and should be stored in a dedicated subdirectory, to allow synctl to manipulate them. An additional configuration for the master synapse process will need to be created because the process will -not be started automatically. That configuration should look like this:: +not be started automatically. That configuration should look like this: worker_app: synapse.app.homeserver daemonize: true @@ -66,17 +64,17 @@ configuration file. You can then override configuration specific to that worker e.g. the HTTP listener that it provides (if any); logging configuration; etc. You should minimise the number of overrides though to maintain a usable config. -You must specify the type of worker application (``worker_app``). The currently +You must specify the type of worker application (`worker_app`). The currently available worker applications are listed below. You must also specify the replication endpoints that it's talking to on the main synapse process. -``worker_replication_host`` should specify the host of the main synapse, -``worker_replication_port`` should point to the TCP replication listener port and -``worker_replication_http_port`` should point to the HTTP replication port. +`worker_replication_host` should specify the host of the main synapse, +`worker_replication_port` should point to the TCP replication listener port and +`worker_replication_http_port` should point to the HTTP replication port. -Currently, the ``event_creator`` and ``federation_reader`` workers require specifying -``worker_replication_http_port``. +Currently, the `event_creator` and `federation_reader` workers require specifying +`worker_replication_http_port`. -For instance:: +For instance: worker_app: synapse.app.synchrotron @@ -97,15 +95,15 @@ For instance:: worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml ...is a full configuration for a synchrotron worker instance, which will expose a -plain HTTP ``/sync`` endpoint on port 8083 separately from the ``/sync`` endpoint provided +plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided by the main synapse. Obviously you should configure your reverse-proxy to route the relevant -endpoints to the worker (``localhost:8083`` in the above example). +endpoints to the worker (`localhost:8083` in the above example). Finally, to actually run your worker-based synapse, you must pass synctl the -a commandline option to tell it to operate on all the worker configurations found -in the given directory, e.g.:: +in the given directory, e.g.: synctl -a $CONFIG/workers start @@ -114,28 +112,24 @@ synapse, unless you explicitly know it's safe not to. For instance, restarting synapse without restarting all the synchrotrons may result in broken typing notifications. -To manipulate a specific worker, you pass the -w option to synctl:: +To manipulate a specific worker, you pass the -w option to synctl: synctl -w $CONFIG/workers/synchrotron.yaml restart +## Available worker applications -Available worker applications ------------------------------ - -``synapse.app.pusher`` -~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.pusher` Handles sending push notifications to sygnal and email. Doesn't handle any -REST endpoints itself, but you should set ``start_pushers: False`` in the +REST endpoints itself, but you should set `start_pushers: False` in the shared configuration file to stop the main synapse sending these notifications. Note this worker cannot be load-balanced: only one instance should be active. -``synapse.app.synchrotron`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.synchrotron` -The synchrotron handles ``sync`` requests from clients. In particular, it can -handle REST endpoints matching the following regular expressions:: +The synchrotron handles `sync` requests from clients. In particular, it can +handle REST endpoints matching the following regular expressions: ^/_matrix/client/(v2_alpha|r0)/sync$ ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ @@ -151,20 +145,18 @@ load-balance across the instances, though it will be more efficient if all requests from a particular user are routed to a single instance. Extracting a userid from the access token is currently left as an exercise for the reader. -``synapse.app.appservice`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.appservice` Handles sending output traffic to Application Services. Doesn't handle any -REST endpoints itself, but you should set ``notify_appservices: False`` in the +REST endpoints itself, but you should set `notify_appservices: False` in the shared configuration file to stop the main synapse sending these notifications. Note this worker cannot be load-balanced: only one instance should be active. -``synapse.app.federation_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.federation_reader` Handles a subset of federation endpoints. In particular, it can handle REST -endpoints matching the following regular expressions:: +endpoints matching the following regular expressions: ^/_matrix/federation/v1/event/ ^/_matrix/federation/v1/state/ @@ -190,40 +182,36 @@ reverse-proxy configuration. The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single instance. -``synapse.app.federation_sender`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.federation_sender` Handles sending federation traffic to other servers. Doesn't handle any -REST endpoints itself, but you should set ``send_federation: False`` in the +REST endpoints itself, but you should set `send_federation: False` in the shared configuration file to stop the main synapse sending this traffic. Note this worker cannot be load-balanced: only one instance should be active. -``synapse.app.media_repository`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.media_repository` -Handles the media repository. It can handle all endpoints starting with:: +Handles the media repository. It can handle all endpoints starting with: /_matrix/media/ -And the following regular expressions matching media-specific administration -APIs:: +And the following regular expressions matching media-specific administration APIs: ^/_synapse/admin/v1/purge_media_cache$ ^/_synapse/admin/v1/room/.*/media$ ^/_synapse/admin/v1/quarantine_media/.*$ -You should also set ``enable_media_repo: False`` in the shared configuration +You should also set `enable_media_repo: False` in the shared configuration file to stop the main synapse running background jobs related to managing the media repository. Note this worker cannot be load-balanced: only one instance should be active. -``synapse.app.client_reader`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.client_reader` Handles client API endpoints. It can handle REST endpoints matching the -following regular expressions:: +following regular expressions: ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ @@ -237,60 +225,55 @@ following regular expressions:: ^/_matrix/client/versions$ ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ -Additionally, the following REST endpoints can be handled for GET requests:: +Additionally, the following REST endpoints can be handled for GET requests: ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ Additionally, the following REST endpoints can be handled, but all requests must -be routed to the same instance:: +be routed to the same instance: ^/_matrix/client/(r0|unstable)/register$ Pagination requests can also be handled, but all requests with the same path room must be routed to the same instance. Additionally, care must be taken to ensure that the purge history admin API is not used while pagination requests -for the room are in flight:: +for the room are in flight: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ - -``synapse.app.user_dir`` -~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.user_dir` Handles searches in the user directory. It can handle REST endpoints matching -the following regular expressions:: +the following regular expressions: ^/_matrix/client/(api/v1|r0|unstable)/user_directory/search$ -``synapse.app.frontend_proxy`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +### `synapse.app.frontend_proxy` Proxies some frequently-requested client endpoints to add caching and remove load from the main synapse. It can handle REST endpoints matching the following -regular expressions:: +regular expressions: ^/_matrix/client/(api/v1|r0|unstable)/keys/upload -If ``use_presence`` is False in the homeserver config, it can also handle REST -endpoints matching the following regular expressions:: +If `use_presence` is False in the homeserver config, it can also handle REST +endpoints matching the following regular expressions: ^/_matrix/client/(api/v1|r0|unstable)/presence/[^/]+/status -This "stub" presence handler will pass through ``GET`` request but make the -``PUT`` effectively a no-op. +This "stub" presence handler will pass through `GET` request but make the +`PUT` effectively a no-op. It will proxy any requests it cannot handle to the main synapse instance. It must therefore be configured with the location of the main instance, via -the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration -file. For example:: +the `worker_main_http_uri` setting in the `frontend_proxy` worker configuration +file. For example: worker_main_http_uri: http://127.0.0.1:8008 +### `synapse.app.event_creator` -``synapse.app.event_creator`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Handles some event creation. It can handle REST endpoints matching:: +Handles some event creation. It can handle REST endpoints matching: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..ffadaddc0 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,62 @@ +[mypy] +namespace_packages=True +plugins=mypy_zope:plugin +follow_imports=skip +mypy_path=stubs + +[mypy-zope] +ignore_missing_imports = True + +[mypy-constantly] +ignore_missing_imports = True + +[mypy-twisted.*] +ignore_missing_imports = True + +[mypy-treq.*] +ignore_missing_imports = True + +[mypy-hyperlink] +ignore_missing_imports = True + +[mypy-h11] +ignore_missing_imports = True + +[mypy-opentracing] +ignore_missing_imports = True + +[mypy-OpenSSL] +ignore_missing_imports = True + +[mypy-netaddr] +ignore_missing_imports = True + +[mypy-saml2.*] +ignore_missing_imports = True + +[mypy-unpaddedbase64] +ignore_missing_imports = True + +[mypy-canonicaljson] +ignore_missing_imports = True + +[mypy-jaeger_client] +ignore_missing_imports = True + +[mypy-jsonschema] +ignore_missing_imports = True + +[mypy-signedjson.*] +ignore_missing_imports = True + +[mypy-prometheus_client.*] +ignore_missing_imports = True + +[mypy-service_identity.*] +ignore_missing_imports = True + +[mypy-daemonize] +ignore_missing_imports = True + +[mypy-sentry_sdk] +ignore_missing_imports = True diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index b6ba19c77..3f942abdb 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -55,6 +55,8 @@ BOOLEAN_COLUMNS = { "local_group_membership": ["is_publicised", "is_admin"], "e2e_room_keys": ["is_verified"], "account_validity": ["email_sent"], + "redactions": ["have_censored"], + "room_stats_state": ["is_federatable"], } diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml new file mode 100644 index 000000000..1f7df71db --- /dev/null +++ b/snap/snapcraft.yaml @@ -0,0 +1,22 @@ +name: matrix-synapse +base: core18 +version: git +summary: Reference Matrix homeserver +description: | + Synapse is the reference Matrix homeserver. + Matrix is a federated and decentralised instant messaging and VoIP system. + +grade: stable +confinement: strict + +apps: + matrix-synapse: + command: synctl --no-daemonize start $SNAP_COMMON/homeserver.yaml + stop-command: synctl -c $SNAP_COMMON stop + plugs: [network-bind, network] + daemon: simple +parts: + matrix-synapse: + source: . + plugin: python + python-version: python3 diff --git a/synapse/__init__.py b/synapse/__init__.py index 6766ef445..ee3313a41 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,6 +17,7 @@ """ This is a reference implementation of a Matrix home server. """ +import os import sys # Check that we're not running on an unsupported Python version. @@ -35,4 +36,11 @@ try: except ImportError: pass -__version__ = "1.3.1" +__version__ = "1.4.1" + +if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): + # We import here so that we don't have to install a bunch of deps when + # running the packaging tox test. + from synapse.util.patch_inline_callbacks import do_patch + + do_patch() diff --git a/synapse/api/auth.py b/synapse/api/auth.py index ddc195bc3..cb50579fd 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -25,7 +25,7 @@ from twisted.internet import defer import synapse.logging.opentracing as opentracing import synapse.types from synapse import event_auth -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import EventTypes, JoinRules, Membership, UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -179,7 +179,6 @@ class Auth(object): def get_public_keys(self, invite_event): return event_auth.get_public_keys(invite_event) - @opentracing.trace @defer.inlineCallbacks def get_user_by_req( self, request, allow_guest=False, rights="access", allow_expired=False @@ -212,6 +211,7 @@ class Auth(object): if user_id: request.authenticated_entity = user_id opentracing.set_tag("authenticated_entity", user_id) + opentracing.set_tag("appservice_id", app_service.id) if ip_addr and self.hs.config.track_appservice_user_ips: yield self.store.insert_client_ip( @@ -263,6 +263,8 @@ class Auth(object): request.authenticated_entity = user.to_string() opentracing.set_tag("authenticated_entity", user.to_string()) + if device_id: + opentracing.set_tag("device_id", device_id) return synapse.types.create_requester( user, token_id, is_guest, device_id, app_service=app_service @@ -709,7 +711,7 @@ class Auth(object): ) @defer.inlineCallbacks - def check_auth_blocking(self, user_id=None, threepid=None): + def check_auth_blocking(self, user_id=None, threepid=None, user_type=None): """Checks if the user should be rejected for some external reason, such as monthly active user limiting or global disable flag @@ -722,6 +724,9 @@ class Auth(object): with a MAU blocked server, normally they would be rejected but their threepid is on the reserved list. user_id and threepid should never be set at the same time. + + user_type(str|None): If present, is used to decide whether to check against + certain blocking reasons like MAU. """ # Never fail an auth check for the server notices users or support user @@ -759,6 +764,10 @@ class Auth(object): self.hs.config.mau_limits_reserved_threepids, threepid ): return + elif user_type == UserTypes.SUPPORT: + # If the user does not exist yet and is of type "support", + # allow registration. Support users are excluded from MAU checks. + return # Else if there is no room in the MAU bucket, bail current_mau = yield self.store.get_monthly_active_count() if current_mau >= self.hs.config.max_mau_value: diff --git a/synapse/api/errors.py b/synapse/api/errors.py index a18d31db0..cca92c34b 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,6 +17,7 @@ """Contains exceptions and error codes.""" import logging +from typing import Dict from six import iteritems from six.moves import http_client @@ -112,7 +113,7 @@ class ProxiedRequestError(SynapseError): def __init__(self, code, msg, errcode=Codes.UNKNOWN, additional_fields=None): super(ProxiedRequestError, self).__init__(code, msg, errcode) if additional_fields is None: - self._additional_fields = {} + self._additional_fields = {} # type: Dict else: self._additional_fields = dict(additional_fields) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 95292b7de..c6f50fd7b 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -12,6 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Dict + import attr @@ -102,4 +105,4 @@ KNOWN_ROOM_VERSIONS = { RoomVersions.V4, RoomVersions.V5, ) -} # type: dict[str, RoomVersion] +} # type: Dict[str, RoomVersion] diff --git a/synapse/app/_base.py b/synapse/app/_base.py index c30fdeee9..2ac7d5c06 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -263,7 +263,9 @@ def start(hs, listeners=None): refresh_certificate(hs) # Start the tracer - synapse.logging.opentracing.init_tracer(hs.config) + synapse.logging.opentracing.init_tracer( # type: ignore[attr-defined] # noqa + hs.config + ) # It is now safe to start your Synapse. hs.start_listening(listeners) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 04f1ed14f..eb54f5685 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -561,10 +561,12 @@ def run(hs): stats["database_engine"] = hs.get_datastore().database_engine_name stats["database_server_version"] = hs.get_datastore().get_server_version() - logger.info("Reporting stats to matrix.org: %s" % (stats,)) + logger.info( + "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats) + ) try: yield hs.get_simple_http_client().put_json( - "https://matrix.org/report-usage-stats/push", stats + hs.config.report_stats_endpoint, stats ) except Exception as e: logger.warn("Error reporting stats: %s", e) @@ -603,13 +605,13 @@ def run(hs): @defer.inlineCallbacks def generate_monthly_active_users(): current_mau_count = 0 - reserved_count = 0 + reserved_users = () store = hs.get_datastore() if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: current_mau_count = yield store.get_monthly_active_count() - reserved_count = yield store.get_registered_reserved_users_count() + reserved_users = yield store.get_registered_reserved_users() current_mau_gauge.set(float(current_mau_count)) - registered_reserved_users_mau_gauge.set(float(reserved_count)) + registered_reserved_users_mau_gauge.set(float(len(reserved_users))) max_mau_gauge.set(float(hs.config.max_mau_value)) def start_generate_monthly_active_users(): diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 31f653097..08619404b 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -18,7 +18,9 @@ import argparse import errno import os +from collections import OrderedDict from textwrap import dedent +from typing import Any, MutableMapping, Optional from six import integer_types @@ -51,7 +53,56 @@ Missing mandatory `server_name` config option. """ +def path_exists(file_path): + """Check if a file exists + + Unlike os.path.exists, this throws an exception if there is an error + checking if the file exists (for example, if there is a perms error on + the parent dir). + + Returns: + bool: True if the file exists; False if not. + """ + try: + os.stat(file_path) + return True + except OSError as e: + if e.errno != errno.ENOENT: + raise e + return False + + class Config(object): + """ + A configuration section, containing configuration keys and values. + + Attributes: + section (str): The section title of this config object, such as + "tls" or "logger". This is used to refer to it on the root + logger (for example, `config.tls.some_option`). Must be + defined in subclasses. + """ + + section = None + + def __init__(self, root_config=None): + self.root = root_config + + def __getattr__(self, item: str) -> Any: + """ + Try and fetch a configuration option that does not exist on this class. + + This is so that existing configs that rely on `self.value`, where value + is actually from a different config section, continue to work. + """ + if item in ["generate_config_section", "read_config"]: + raise AttributeError(item) + + if self.root is None: + raise AttributeError(item) + else: + return self.root._get_unclassed_config(self.section, item) + @staticmethod def parse_size(value): if isinstance(value, integer_types): @@ -88,22 +139,7 @@ class Config(object): @classmethod def path_exists(cls, file_path): - """Check if a file exists - - Unlike os.path.exists, this throws an exception if there is an error - checking if the file exists (for example, if there is a perms error on - the parent dir). - - Returns: - bool: True if the file exists; False if not. - """ - try: - os.stat(file_path) - return True - except OSError as e: - if e.errno != errno.ENOENT: - raise e - return False + return path_exists(file_path) @classmethod def check_file(cls, file_path, config_name): @@ -136,42 +172,106 @@ class Config(object): with open(file_path) as file_stream: return file_stream.read() - def invoke_all(self, name, *args, **kargs): - """Invoke all instance methods with the given name and arguments in the - class's MRO. + +class RootConfig(object): + """ + Holder of an application's configuration. + + What configuration this object holds is defined by `config_classes`, a list + of Config classes that will be instantiated and given the contents of a + configuration file to read. They can then be accessed on this class by their + section name, defined in the Config or dynamically set to be the name of the + class, lower-cased and with "Config" removed. + """ + + config_classes = [] + + def __init__(self): + self._configs = OrderedDict() + + for config_class in self.config_classes: + if config_class.section is None: + raise ValueError("%r requires a section name" % (config_class,)) + + try: + conf = config_class(self) + except Exception as e: + raise Exception("Failed making %s: %r" % (config_class.section, e)) + self._configs[config_class.section] = conf + + def __getattr__(self, item: str) -> Any: + """ + Redirect lookups on this object either to config objects, or values on + config objects, so that `config.tls.blah` works, as well as legacy uses + of things like `config.server_name`. It will first look up the config + section name, and then values on those config classes. + """ + if item in self._configs.keys(): + return self._configs[item] + + return self._get_unclassed_config(None, item) + + def _get_unclassed_config(self, asking_section: Optional[str], item: str): + """ + Fetch a config value from one of the instantiated config classes that + has not been fetched directly. Args: - name (str): Name of function to invoke + asking_section: If this check is coming from a Config child, which + one? This section will not be asked if it has the value. + item: The configuration value key. + + Raises: + AttributeError if no config classes have the config key. The body + will contain what sections were checked. + """ + for key, val in self._configs.items(): + if key == asking_section: + continue + + if item in dir(val): + return getattr(val, item) + + raise AttributeError(item, "not found in %s" % (list(self._configs.keys()),)) + + def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + """ + Invoke a function on all instantiated config objects this RootConfig is + configured to use. + + Args: + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for cls in type(self).mro(): - if name in cls.__dict__: - results.append(getattr(cls, name)(self, *args, **kargs)) - return results + res = OrderedDict() + + for name, config in self._configs.items(): + if hasattr(config, func_name): + res[name] = getattr(config, func_name)(*args, **kwargs) + + return res @classmethod - def invoke_all_static(cls, name, *args, **kargs): - """Invoke all static methods with the given name and arguments in the - class's MRO. + def invoke_all_static(cls, func_name: str, *args, **kwargs): + """ + Invoke a static function on config objects this RootConfig is + configured to use. Args: - name (str): Name of function to invoke + func_name: Name of function to invoke *args **kwargs - Returns: - list: The list of the return values from each method called + ordered dictionary of config section name and the result of the + function from it. """ - results = [] - for c in cls.mro(): - if name in c.__dict__: - results.append(getattr(c, name)(*args, **kargs)) - return results + for config in cls.config_classes: + if hasattr(config, func_name): + getattr(config, func_name)(*args, **kwargs) def generate_config( self, @@ -187,7 +287,8 @@ class Config(object): tls_private_key_path=None, acme_domain=None, ): - """Build a default configuration file + """ + Build a default configuration file This is used when the user explicitly asks us to generate a config file (eg with --generate_config). @@ -242,6 +343,7 @@ class Config(object): Returns: str: the yaml config file """ + return "\n\n".join( dedent(conf) for conf in self.invoke_all( @@ -257,7 +359,7 @@ class Config(object): tls_certificate_path=tls_certificate_path, tls_private_key_path=tls_private_key_path, acme_domain=acme_domain, - ) + ).values() ) @classmethod @@ -444,7 +546,7 @@ class Config(object): ) (config_path,) = config_files - if not cls.path_exists(config_path): + if not path_exists(config_path): print("Generating config file %s" % (config_path,)) if config_args.data_directory: @@ -469,7 +571,7 @@ class Config(object): open_private_ports=config_args.open_private_ports, ) - if not cls.path_exists(config_dir_path): + if not path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: config_file.write("# vim:ft=yaml\n\n") @@ -518,7 +620,7 @@ class Config(object): return obj - def parse_config_dict(self, config_dict, config_dir_path, data_dir_path): + def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): """Read the information from the config dict into this Config object. Args: @@ -607,3 +709,6 @@ def find_config_files(search_paths): else: config_files.append(config_path) return config_files + + +__all__ = ["Config", "RootConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi new file mode 100644 index 000000000..86bc965ee --- /dev/null +++ b/synapse/config/_base.pyi @@ -0,0 +1,135 @@ +from typing import Any, List, Optional + +from synapse.config import ( + api, + appservice, + captcha, + cas, + consent_config, + database, + emailconfig, + groups, + jwt_config, + key, + logger, + metrics, + password, + password_auth_providers, + push, + ratelimiting, + registration, + repository, + room_directory, + saml2_config, + server, + server_notices_config, + spam_checker, + stats, + third_party_event_rules, + tls, + tracer, + user_directory, + voip, + workers, +) + +class ConfigError(Exception): ... + +MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str +MISSING_REPORT_STATS_SPIEL: str +MISSING_SERVER_NAME: str + +def path_exists(file_path: str): ... + +class RootConfig: + server: server.ServerConfig + tls: tls.TlsConfig + database: database.DatabaseConfig + logging: logger.LoggingConfig + ratelimit: ratelimiting.RatelimitConfig + media: repository.ContentRepositoryConfig + captcha: captcha.CaptchaConfig + voip: voip.VoipConfig + registration: registration.RegistrationConfig + metrics: metrics.MetricsConfig + api: api.ApiConfig + appservice: appservice.AppServiceConfig + key: key.KeyConfig + saml2: saml2_config.SAML2Config + cas: cas.CasConfig + jwt: jwt_config.JWTConfig + password: password.PasswordConfig + email: emailconfig.EmailConfig + worker: workers.WorkerConfig + authproviders: password_auth_providers.PasswordAuthProviderConfig + push: push.PushConfig + spamchecker: spam_checker.SpamCheckerConfig + groups: groups.GroupsConfig + userdirectory: user_directory.UserDirectoryConfig + consent: consent_config.ConsentConfig + stats: stats.StatsConfig + servernotices: server_notices_config.ServerNoticesConfig + roomdirectory: room_directory.RoomDirectoryConfig + thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig + tracer: tracer.TracerConfig + + config_classes: List = ... + def __init__(self) -> None: ... + def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + @classmethod + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... + def __getattr__(self, item: str): ... + def parse_config_dict( + self, + config_dict: Any, + config_dir_path: Optional[Any] = ..., + data_dir_path: Optional[Any] = ..., + ) -> None: ... + read_config: Any = ... + def generate_config( + self, + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = ..., + report_stats: Optional[str] = ..., + open_private_ports: bool = ..., + listeners: Optional[Any] = ..., + database_conf: Optional[Any] = ..., + tls_certificate_path: Optional[str] = ..., + tls_private_key_path: Optional[str] = ..., + acme_domain: Optional[str] = ..., + ): ... + @classmethod + def load_or_generate_config(cls, description: Any, argv: Any): ... + @classmethod + def load_config(cls, description: Any, argv: Any): ... + @classmethod + def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + @classmethod + def load_config_with_parser(cls, parser: Any, argv: Any): ... + def generate_missing_files( + self, config_dict: dict, config_dir_path: str + ) -> None: ... + +class Config: + root: RootConfig + def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... + def __getattr__(self, item: str, from_root: bool = ...): ... + @staticmethod + def parse_size(value: Any): ... + @staticmethod + def parse_duration(value: Any): ... + @staticmethod + def abspath(file_path: Optional[str]): ... + @classmethod + def path_exists(cls, file_path: str): ... + @classmethod + def check_file(cls, file_path: str, config_name: str): ... + @classmethod + def ensure_directory(cls, dir_path: str): ... + @classmethod + def read_file(cls, file_path: str, config_name: str): ... + +def read_config_files(config_files: List[str]): ... +def find_config_files(search_paths: List[str]): ... diff --git a/synapse/config/api.py b/synapse/config/api.py index dddea79a8..74cd53a8e 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -18,6 +18,8 @@ from ._base import Config class ApiConfig(Config): + section = "api" + def read_config(self, config, **kwargs): self.room_invite_state_types = config.get( "room_invite_state_types", diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 8387ff680..9b4682222 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import Dict from six import string_types from six.moves.urllib import parse as urlparse @@ -29,6 +30,8 @@ logger = logging.getLogger(__name__) class AppServiceConfig(Config): + section = "appservice" + def read_config(self, config, **kwargs): self.app_service_config_files = config.get("app_service_config_files", []) self.notify_appservices = config.get("notify_appservices", True) @@ -56,8 +59,8 @@ def load_appservices(hostname, config_files): return [] # Dicts of value -> filename - seen_as_tokens = {} - seen_ids = {} + seen_as_tokens = {} # type: Dict[str, str] + seen_ids = {} # type: Dict[str, str] appservices = [] diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 8dac8152c..44bd5c679 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -16,6 +16,8 @@ from ._base import Config class CaptchaConfig(Config): + section = "captcha" + def read_config(self, config, **kwargs): self.recaptcha_private_key = config.get("recaptcha_private_key") self.recaptcha_public_key = config.get("recaptcha_public_key") diff --git a/synapse/config/cas.py b/synapse/config/cas.py index ebe34d933..4526c1a67 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py @@ -22,17 +22,21 @@ class CasConfig(Config): cas_server_url: URL of CAS server """ + section = "cas" + def read_config(self, config, **kwargs): cas_config = config.get("cas_config", None) if cas_config: self.cas_enabled = cas_config.get("enabled", True) self.cas_server_url = cas_config["server_url"] self.cas_service_url = cas_config["service_url"] + self.cas_displayname_attribute = cas_config.get("displayname_attribute") self.cas_required_attributes = cas_config.get("required_attributes", {}) else: self.cas_enabled = False self.cas_server_url = None self.cas_service_url = None + self.cas_displayname_attribute = None self.cas_required_attributes = {} def generate_config_section(self, config_dir_path, server_name, **kwargs): @@ -43,6 +47,7 @@ class CasConfig(Config): # enabled: true # server_url: "https://cas-server.com" # service_url: "https://homeserver.domain.com:8448" + # #displayname_attribute: name # #required_attributes: # # name: value """ diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 94916f3a4..62c4c44d6 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -73,8 +73,11 @@ DEFAULT_CONFIG = """\ class ConsentConfig(Config): - def __init__(self): - super(ConsentConfig, self).__init__() + + section = "consent" + + def __init__(self, *args): + super(ConsentConfig, self).__init__(*args) self.user_consent_version = None self.user_consent_template_dir = None diff --git a/synapse/config/database.py b/synapse/config/database.py index 118aafbd4..0e2509f0b 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -21,6 +21,8 @@ from ._base import Config class DatabaseConfig(Config): + section = "database" + def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index e5de768b0..658897a77 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -28,6 +28,8 @@ from ._base import Config, ConfigError class EmailConfig(Config): + section = "email" + def read_config(self, config, **kwargs): # TODO: We should separate better the email configuration from the notification # and account validity config. @@ -169,12 +171,22 @@ class EmailConfig(Config): self.email_registration_template_text = email_config.get( "registration_template_text", "registration.txt" ) + self.email_add_threepid_template_html = email_config.get( + "add_threepid_template_html", "add_threepid.html" + ) + self.email_add_threepid_template_text = email_config.get( + "add_threepid_template_text", "add_threepid.txt" + ) + self.email_password_reset_template_failure_html = email_config.get( "password_reset_template_failure_html", "password_reset_failure.html" ) self.email_registration_template_failure_html = email_config.get( "registration_template_failure_html", "registration_failure.html" ) + self.email_add_threepid_template_failure_html = email_config.get( + "add_threepid_template_failure_html", "add_threepid_failure.html" + ) # These templates do not support any placeholder variables, so we # will read them from disk once during setup @@ -184,6 +196,9 @@ class EmailConfig(Config): email_registration_template_success_html = email_config.get( "registration_template_success_html", "registration_success.html" ) + email_add_threepid_template_success_html = email_config.get( + "add_threepid_template_success_html", "add_threepid_success.html" + ) # Check templates exist for f in [ @@ -191,9 +206,14 @@ class EmailConfig(Config): self.email_password_reset_template_text, self.email_registration_template_html, self.email_registration_template_text, + self.email_add_threepid_template_html, + self.email_add_threepid_template_text, self.email_password_reset_template_failure_html, + self.email_registration_template_failure_html, + self.email_add_threepid_template_failure_html, email_password_reset_template_success_html, email_registration_template_success_html, + email_add_threepid_template_success_html, ]: p = os.path.join(self.email_template_dir, f) if not os.path.isfile(p): @@ -212,6 +232,12 @@ class EmailConfig(Config): self.email_registration_template_success_html_content = self.read_file( filepath, "email.registration_template_success_html" ) + filepath = os.path.join( + self.email_template_dir, email_add_threepid_template_success_html + ) + self.email_add_threepid_template_success_html_content = self.read_file( + filepath, "email.add_threepid_template_success_html" + ) if self.email_enable_notifs: required = [ @@ -328,6 +354,12 @@ class EmailConfig(Config): # #registration_template_html: registration.html # #registration_template_text: registration.txt # + # # Templates for validation emails sent by the homeserver when adding an email to + # # your user account + # # + # #add_threepid_template_html: add_threepid.html + # #add_threepid_template_text: add_threepid.txt + # # # Templates for password reset success and failure pages that a user # # will see after attempting to reset their password # # @@ -339,6 +371,12 @@ class EmailConfig(Config): # # # #registration_template_success_html: registration_success.html # #registration_template_failure_html: registration_failure.html + # + # # Templates for success and failure pages that a user will see after attempting + # # to add an email or phone to their account + # # + # #add_threepid_success_html: add_threepid_success.html + # #add_threepid_failure_html: add_threepid_failure.html """ diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 2a522b5f4..d6862d9a6 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -17,6 +17,8 @@ from ._base import Config class GroupsConfig(Config): + section = "groups" + def read_config(self, config, **kwargs): self.enable_group_creation = config.get("enable_group_creation", False) self.group_creation_prefix = config.get("group_creation_prefix", "") diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 72acad4f1..6e348671c 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ._base import RootConfig from .api import ApiConfig from .appservice import AppServiceConfig from .captcha import CaptchaConfig @@ -46,36 +47,37 @@ from .voip import VoipConfig from .workers import WorkerConfig -class HomeServerConfig( - ServerConfig, - TlsConfig, - DatabaseConfig, - LoggingConfig, - RatelimitConfig, - ContentRepositoryConfig, - CaptchaConfig, - VoipConfig, - RegistrationConfig, - MetricsConfig, - ApiConfig, - AppServiceConfig, - KeyConfig, - SAML2Config, - CasConfig, - JWTConfig, - PasswordConfig, - EmailConfig, - WorkerConfig, - PasswordAuthProviderConfig, - PushConfig, - SpamCheckerConfig, - GroupsConfig, - UserDirectoryConfig, - ConsentConfig, - StatsConfig, - ServerNoticesConfig, - RoomDirectoryConfig, - ThirdPartyRulesConfig, - TracerConfig, -): - pass +class HomeServerConfig(RootConfig): + + config_classes = [ + ServerConfig, + TlsConfig, + DatabaseConfig, + LoggingConfig, + RatelimitConfig, + ContentRepositoryConfig, + CaptchaConfig, + VoipConfig, + RegistrationConfig, + MetricsConfig, + ApiConfig, + AppServiceConfig, + KeyConfig, + SAML2Config, + CasConfig, + JWTConfig, + PasswordConfig, + EmailConfig, + WorkerConfig, + PasswordAuthProviderConfig, + PushConfig, + SpamCheckerConfig, + GroupsConfig, + UserDirectoryConfig, + ConsentConfig, + StatsConfig, + ServerNoticesConfig, + RoomDirectoryConfig, + ThirdPartyRulesConfig, + TracerConfig, + ] diff --git a/synapse/config/jwt_config.py b/synapse/config/jwt_config.py index 36d87cef0..a56872698 100644 --- a/synapse/config/jwt_config.py +++ b/synapse/config/jwt_config.py @@ -23,6 +23,8 @@ MISSING_JWT = """Missing jwt library. This is required for jwt login. class JWTConfig(Config): + section = "jwt" + def read_config(self, config, **kwargs): jwt_config = config.get("jwt_config", None) if jwt_config: diff --git a/synapse/config/key.py b/synapse/config/key.py index ba2199bce..ec5d430af 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -50,6 +50,33 @@ and you should enable 'federation_verify_certificates' in your configuration. If you are *sure* you want to do this, set 'accept_keys_insecurely' on the trusted_key_server configuration.""" +TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN = """\ +Synapse requires that a list of trusted key servers are specified in order to +provide signing keys for other servers in the federation. + +This homeserver does not have a trusted key server configured in +homeserver.yaml and will fall back to the default of 'matrix.org'. + +Trusted key servers should be long-lived and stable which makes matrix.org a +good choice for many admins, but some admins may wish to choose another. To +suppress this warning, the admin should set 'trusted_key_servers' in +homeserver.yaml to their desired key server and 'suppress_key_server_warning' +to 'true'. + +In a future release the software-defined default will be removed entirely and +the trusted key server will be defined exclusively by the value of +'trusted_key_servers'. +--------------------------------------------------------------------------------""" + +TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN = """\ +This server is configured to use 'matrix.org' as its trusted key server via the +'trusted_key_servers' config option. 'matrix.org' is a good choice for a key +server since it is long-lived, stable and trusted. However, some admins may +wish to use another server for this purpose. + +To suppress this warning and continue using 'matrix.org', admins should set +'suppress_key_server_warning' to 'true' in homeserver.yaml. +--------------------------------------------------------------------------------""" logger = logging.getLogger(__name__) @@ -65,6 +92,8 @@ class TrustedKeyServer(object): class KeyConfig(Config): + section = "key" + def read_config(self, config, config_dir_path, **kwargs): # the signing key can be specified inline or in a separate file if "signing_key" in config: @@ -85,6 +114,7 @@ class KeyConfig(Config): config.get("key_refresh_interval", "1d") ) + suppress_key_server_warning = config.get("suppress_key_server_warning", False) key_server_signing_keys_path = config.get("key_server_signing_keys_path") if key_server_signing_keys_path: self.key_server_signing_keys = self.read_signing_keys( @@ -95,6 +125,7 @@ class KeyConfig(Config): # if neither trusted_key_servers nor perspectives are given, use the default. if "perspectives" not in config and "trusted_key_servers" not in config: + logger.warn(TRUSTED_KEY_SERVER_NOT_CONFIGURED_WARN) key_servers = [{"server_name": "matrix.org"}] else: key_servers = config.get("trusted_key_servers", []) @@ -108,6 +139,11 @@ class KeyConfig(Config): # merge the 'perspectives' config into the 'trusted_key_servers' config. key_servers.extend(_perspectives_to_key_servers(config)) + if not suppress_key_server_warning and "matrix.org" in ( + s["server_name"] for s in key_servers + ): + logger.warning(TRUSTED_KEY_SERVER_CONFIGURED_AS_M_ORG_WARN) + # list of TrustedKeyServer objects self.key_servers = list( _parse_key_servers(key_servers, self.federation_verify_certificates) @@ -190,6 +226,10 @@ class KeyConfig(Config): # This setting supercedes an older setting named `perspectives`. The old format # is still supported for backwards-compatibility, but it is deprecated. # + # 'trusted_key_servers' defaults to matrix.org, but using it will generate a + # warning on start-up. To suppress this warning, set + # 'suppress_key_server_warning' to true. + # # Options for each entry in the list include: # # server_name: the name of the server. required. @@ -214,11 +254,13 @@ class KeyConfig(Config): # "ed25519:auto": "abcdefghijklmnopqrstuvwxyzabcdefghijklmopqr" # - server_name: "my_other_trusted_server.example.com" # - # The default configuration is: - # - #trusted_key_servers: - # - server_name: "matrix.org" + trusted_key_servers: + - server_name: "matrix.org" + + # Uncomment the following to disable the warning that is emitted when the + # trusted_key_servers include 'matrix.org'. See above. # + #suppress_key_server_warning: true # The signing keys to use when acting as a trusted key server. If not specified # defaults to the server signing key. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 2704c1872..d609ec111 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -21,7 +21,12 @@ from string import Template import yaml -from twisted.logger import STDLibLogObserver, globalLogBeginner +from twisted.logger import ( + ILogObserver, + LogBeginner, + STDLibLogObserver, + globalLogBeginner, +) import synapse from synapse.app import _base as appbase @@ -79,6 +84,8 @@ root: class LoggingConfig(Config): + section = "logging" + def read_config(self, config, **kwargs): self.log_config = self.abspath(config.get("log_config")) self.no_redirect_stdio = config.get("no_redirect_stdio", False) @@ -124,7 +131,7 @@ class LoggingConfig(Config): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config): +def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner): """ Set up Python stdlib logging. """ @@ -165,12 +172,12 @@ def _setup_stdlib_logging(config, log_config): return observer(event) - globalLogBeginner.beginLoggingTo( - [_log], redirectStandardIO=not config.no_redirect_stdio - ) + logBeginner.beginLoggingTo([_log], redirectStandardIO=not config.no_redirect_stdio) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + return observer + def _reload_stdlib_logging(*args, log_config=None): logger = logging.getLogger("") @@ -181,7 +188,9 @@ def _reload_stdlib_logging(*args, log_config=None): logging.config.dictConfig(log_config) -def setup_logging(hs, config, use_worker_options=False): +def setup_logging( + hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner +) -> ILogObserver: """ Set up the logging subsystem. @@ -191,6 +200,12 @@ def setup_logging(hs, config, use_worker_options=False): use_worker_options (bool): True to use the 'worker_log_config' option instead of 'log_config'. + + logBeginner: The Twisted logBeginner to use. + + Returns: + The "root" Twisted Logger observer, suitable for sending logs to from a + Logger instance. """ log_config = config.worker_log_config if use_worker_options else config.log_config @@ -210,10 +225,12 @@ def setup_logging(hs, config, use_worker_options=False): log_config_body = read_config() if log_config_body and log_config_body.get("structured") is True: - setup_structured_logging(hs, config, log_config_body) + logger = setup_structured_logging( + hs, config, log_config_body, logBeginner=logBeginner + ) appbase.register_sighup(read_config, callback=reload_structured_logging) else: - _setup_stdlib_logging(config, log_config_body) + logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards @@ -221,3 +238,5 @@ def setup_logging(hs, config, use_worker_options=False): logging.warn("***** STARTING SERVER *****") logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) logging.info("Server hostname: %s", config.server_name) + + return logger diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 653b990e6..282a43bdd 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -16,11 +16,9 @@ import attr -from ._base import Config, ConfigError +from synapse.python_dependencies import DependencyException, check_requirements -MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry - integration. - """ +from ._base import Config, ConfigError @attr.s @@ -36,9 +34,14 @@ class MetricsFlags(object): class MetricsConfig(Config): + section = "metrics" + def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) + self.report_stats_endpoint = config.get( + "report_stats_endpoint", "https://matrix.org/report-usage-stats/push" + ) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -51,9 +54,9 @@ class MetricsConfig(Config): self.sentry_enabled = "sentry" in config if self.sentry_enabled: try: - import sentry_sdk # noqa F401 - except ImportError: - raise ConfigError(MISSING_SENTRY) + check_requirements("sentry") + except DependencyException as e: + raise ConfigError(e.message) self.sentry_dsn = config["sentry"].get("dsn") if not self.sentry_dsn: @@ -97,4 +100,10 @@ class MetricsConfig(Config): else: res += "report_stats: %s\n" % ("true" if report_stats else "false") + res += """ + # The endpoint to report the anonymized homeserver usage statistics to. + # Defaults to https://matrix.org/report-usage-stats/push + # + #report_stats_endpoint: https://example.com/report-usage-stats/push + """ return res diff --git a/synapse/config/password.py b/synapse/config/password.py index d5b5953f2..2a634ac75 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -20,6 +20,8 @@ class PasswordConfig(Config): """Password login configuration """ + section = "password" + def read_config(self, config, **kwargs): password_config = config.get("password_config", {}) if password_config is None: diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 788c39c9f..9746bbc68 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, List + from synapse.util.module_loader import load_module from ._base import Config @@ -21,8 +23,10 @@ LDAP_PROVIDER = "ldap_auth_provider.LdapAuthProvider" class PasswordAuthProviderConfig(Config): + section = "authproviders" + def read_config(self, config, **kwargs): - self.password_providers = [] + self.password_providers = [] # type: List[Any] providers = [] # We want to be backwards compatible with the old `ldap_config` diff --git a/synapse/config/push.py b/synapse/config/push.py index 1b932722a..091095864 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -18,6 +18,8 @@ from ._base import Config class PushConfig(Config): + section = "push" + def read_config(self, config, **kwargs): push_config = config.get("push", {}) self.push_include_content = push_config.get("include_content", True) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 33f31cf21..947f653e0 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -36,6 +36,8 @@ class FederationRateLimitConfig(object): class RatelimitConfig(Config): + section = "ratelimiting" + def read_config(self, config, **kwargs): # Load the new-style messages config if it exists. Otherwise fall back @@ -80,6 +82,12 @@ class RatelimitConfig(Config): "federation_rr_transactions_per_room_per_second", 50 ) + rc_admin_redaction = config.get("rc_admin_redaction") + if rc_admin_redaction: + self.rc_admin_redaction = RateLimitConfig(rc_admin_redaction) + else: + self.rc_admin_redaction = None + def generate_config_section(self, **kwargs): return """\ ## Ratelimiting ## @@ -102,6 +110,9 @@ class RatelimitConfig(Config): # - one for login that ratelimits login requests based on the account the # client is attempting to log into, based on the amount of failed login # attempts for this account. + # - one for ratelimiting redactions by room admins. If this is not explicitly + # set then it uses the same ratelimiting as per rc_message. This is useful + # to allow room admins to deal with abuse quickly. # # The defaults are as shown below. # @@ -123,6 +134,10 @@ class RatelimitConfig(Config): # failed_attempts: # per_second: 0.17 # burst_count: 3 + # + #rc_admin_redaction: + # per_second: 1 + # burst_count: 50 # Ratelimiting settings for incoming federation diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 9548560ed..b3e3e6dda 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,6 +24,8 @@ from synapse.util.stringutils import random_string_with_symbols class AccountValidityConfig(Config): + section = "accountvalidity" + def __init__(self, config, synapse_config): self.enabled = config.get("enabled", False) self.renew_by_email_enabled = "renew_at" in config @@ -77,6 +79,8 @@ class AccountValidityConfig(Config): class RegistrationConfig(Config): + section = "registration" + def read_config(self, config, **kwargs): self.enable_registration = bool( strtobool(str(config.get("enable_registration", False))) @@ -293,8 +297,10 @@ class RegistrationConfig(Config): # by the Matrix Identity Service API specification: # https://matrix.org/docs/spec/identity_service/latest # + # If a delegate is specified, the config option public_baseurl must also be filled out. + # account_threepid_delegates: - #email: https://example.com # Delegate email sending to matrix.org + #email: https://example.com # Delegate email sending to example.org #msisdn: http://localhost:8090 # Delegate SMS sending to this local process # Users who register on this homeserver will automatically be joined diff --git a/synapse/config/repository.py b/synapse/config/repository.py index fdb1f246d..d0205e14b 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 matrix.org +# Copyright 2014, 2015 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ import os from collections import namedtuple +from typing import Dict, List +from synapse.python_dependencies import DependencyException, check_requirements from synapse.util.module_loader import load_module from ._base import Config, ConfigError @@ -34,17 +36,6 @@ THUMBNAIL_SIZE_YAML = """\ # method: %(method)s """ -MISSING_NETADDR = "Missing netaddr library. This is required for URL preview API." - -MISSING_LXML = """Missing lxml library. This is required for URL preview API. - - Install by running: - pip install lxml - - Requires libxslt1-dev system package. - """ - - ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) @@ -71,7 +62,7 @@ def parse_thumbnail_requirements(thumbnail_sizes): Dictionary mapping from media type string to list of ThumbnailRequirement tuples. """ - requirements = {} + requirements = {} # type: Dict[str, List] for size in thumbnail_sizes: width = size["width"] height = size["height"] @@ -87,6 +78,8 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): + section = "media" + def read_config(self, config, **kwargs): # Only enable the media repo if either the media repo is enabled or the @@ -140,7 +133,7 @@ class ContentRepositoryConfig(Config): # # We don't create the storage providers here as not all workers need # them to be started. - self.media_storage_providers = [] + self.media_storage_providers = [] # type: List[tuple] for provider_config in storage_providers: # We special case the module "file_system" so as not to need to @@ -171,16 +164,10 @@ class ContentRepositoryConfig(Config): self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: try: - import lxml + check_requirements("url_preview") - lxml # To stop unused lint. - except ImportError: - raise ConfigError(MISSING_LXML) - - try: - from netaddr import IPSet - except ImportError: - raise ConfigError(MISSING_NETADDR) + except DependencyException as e: + raise ConfigError(e.message) if "url_preview_ip_range_blacklist" not in config: raise ConfigError( @@ -189,6 +176,9 @@ class ContentRepositoryConfig(Config): "to work" ) + # netaddr is a dependency for url_preview + from netaddr import IPSet + self.url_preview_ip_range_blacklist = IPSet( config["url_preview_ip_range_blacklist"] ) diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index a92693017..7c9f05bde 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -19,6 +19,8 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): + section = "roomdirectory" + def read_config(self, config, **kwargs): self.enable_room_list_search = config.get("enable_room_list_search", True) diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index 6a8161547..c407e1368 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,12 +13,50 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import re + from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import ( + map_username_to_mxid_localpart, + mxid_localpart_allowed_characters, +) +from synapse.util.module_loader import load_python_module from ._base import Config, ConfigError +def _dict_merge(merge_dict, into_dict): + """Do a deep merge of two dicts + + Recursively merges `merge_dict` into `into_dict`: + * For keys where both `merge_dict` and `into_dict` have a dict value, the values + are recursively merged + * For all other keys, the values in `into_dict` (if any) are overwritten with + the value from `merge_dict`. + + Args: + merge_dict (dict): dict to merge + into_dict (dict): target dict + """ + for k, v in merge_dict.items(): + if k not in into_dict: + into_dict[k] = v + continue + + current_val = into_dict[k] + + if isinstance(v, dict) and isinstance(current_val, dict): + _dict_merge(v, current_val) + continue + + # otherwise we just overwrite + into_dict[k] = v + + class SAML2Config(Config): + section = "saml2" + def read_config(self, config, **kwargs): self.saml2_enabled = False @@ -26,6 +65,9 @@ class SAML2Config(Config): if not saml2_config or not saml2_config.get("enabled", True): return + if not saml2_config.get("sp_config") and not saml2_config.get("config_path"): + return + try: check_requirements("saml2") except DependencyException as e: @@ -33,21 +75,40 @@ class SAML2Config(Config): self.saml2_enabled = True - import saml2.config + self.saml2_mxid_source_attribute = saml2_config.get( + "mxid_source_attribute", "uid" + ) - self.saml2_sp_config = saml2.config.SPConfig() - self.saml2_sp_config.load(self._default_saml_config_dict()) - self.saml2_sp_config.load(saml2_config.get("sp_config", {})) + self.saml2_grandfathered_mxid_source_attribute = saml2_config.get( + "grandfathered_mxid_source_attribute", "uid" + ) + + saml2_config_dict = self._default_saml_config_dict() + _dict_merge( + merge_dict=saml2_config.get("sp_config", {}), into_dict=saml2_config_dict + ) config_path = saml2_config.get("config_path", None) if config_path is not None: - self.saml2_sp_config.load_file(config_path) + mod = load_python_module(config_path) + _dict_merge(merge_dict=mod.CONFIG, into_dict=saml2_config_dict) + + import saml2.config + + self.saml2_sp_config = saml2.config.SPConfig() + self.saml2_sp_config.load(saml2_config_dict) # session lifetime: in milliseconds self.saml2_session_lifetime = self.parse_duration( saml2_config.get("saml_session_lifetime", "5m") ) + mapping = saml2_config.get("mxid_mapping", "hexencode") + try: + self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping] + except KeyError: + raise ConfigError("%s is not a known mxid_mapping" % (mapping,)) + def _default_saml_config_dict(self): import saml2 @@ -55,6 +116,13 @@ class SAML2Config(Config): if public_baseurl is None: raise ConfigError("saml2_config requires a public_baseurl to be set") + required_attributes = {"uid", self.saml2_mxid_source_attribute} + + optional_attributes = {"displayName"} + if self.saml2_grandfathered_mxid_source_attribute: + optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute) + optional_attributes -= required_attributes + metadata_url = public_baseurl + "_matrix/saml2/metadata.xml" response_url = public_baseurl + "_matrix/saml2/authn_response" return { @@ -66,8 +134,9 @@ class SAML2Config(Config): (response_url, saml2.BINDING_HTTP_POST) ] }, - "required_attributes": ["uid"], - "optional_attributes": ["mail", "surname", "givenname"], + "required_attributes": list(required_attributes), + "optional_attributes": list(optional_attributes), + # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT, } }, } @@ -76,12 +145,13 @@ class SAML2Config(Config): return """\ # Enable SAML2 for registration and login. Uses pysaml2. # - # `sp_config` is the configuration for the pysaml2 Service Provider. - # See pysaml2 docs for format of config. + # At least one of `sp_config` or `config_path` must be set in this section to + # enable SAML login. # - # Default values will be used for the 'entityid' and 'service' settings, - # so it is not normally necessary to specify them unless you need to - # override them. + # (You will probably also want to set the following options to `false` to + # disable the regular login/registration flows: + # * enable_registration + # * password_config.enabled # # Once SAML support is enabled, a metadata file will be exposed at # https://:/_matrix/saml2/metadata.xml, which you may be able to @@ -89,52 +159,105 @@ class SAML2Config(Config): # the IdP to use an ACS location of # https://:/_matrix/saml2/authn_response. # - #saml2_config: - # sp_config: - # # point this to the IdP's metadata. You can use either a local file or - # # (preferably) a URL. - # metadata: - # #local: ["saml2/idp.xml"] - # remote: - # - url: https://our_idp/metadata.xml - # - # # By default, the user has to go to our login page first. If you'd like to - # # allow IdP-initiated login, set 'allow_unsolicited: True' in a - # # 'service.sp' section: - # # - # #service: - # # sp: - # # allow_unsolicited: True - # - # # The examples below are just used to generate our metadata xml, and you - # # may well not need it, depending on your setup. Alternatively you - # # may need a whole lot more detail - see the pysaml2 docs! - # - # description: ["My awesome SP", "en"] - # name: ["Test SP", "en"] - # - # organization: - # name: Example com - # display_name: - # - ["Example co", "en"] - # url: "http://example.com" - # - # contact_person: - # - given_name: Bob - # sur_name: "the Sysadmin" - # email_address": ["admin@example.com"] - # contact_type": technical - # - # # Instead of putting the config inline as above, you can specify a - # # separate pysaml2 configuration file: - # # - # config_path: "%(config_dir_path)s/sp_conf.py" - # - # # the lifetime of a SAML session. This defines how long a user has to - # # complete the authentication process, if allow_unsolicited is unset. - # # The default is 5 minutes. - # # - # # saml_session_lifetime: 5m + saml2_config: + # `sp_config` is the configuration for the pysaml2 Service Provider. + # See pysaml2 docs for format of config. + # + # Default values will be used for the 'entityid' and 'service' settings, + # so it is not normally necessary to specify them unless you need to + # override them. + # + #sp_config: + # # point this to the IdP's metadata. You can use either a local file or + # # (preferably) a URL. + # metadata: + # #local: ["saml2/idp.xml"] + # remote: + # - url: https://our_idp/metadata.xml + # + # # By default, the user has to go to our login page first. If you'd like + # # to allow IdP-initiated login, set 'allow_unsolicited: True' in a + # # 'service.sp' section: + # # + # #service: + # # sp: + # # allow_unsolicited: true + # + # # The examples below are just used to generate our metadata xml, and you + # # may well not need them, depending on your setup. Alternatively you + # # may need a whole lot more detail - see the pysaml2 docs! + # + # description: ["My awesome SP", "en"] + # name: ["Test SP", "en"] + # + # organization: + # name: Example com + # display_name: + # - ["Example co", "en"] + # url: "http://example.com" + # + # contact_person: + # - given_name: Bob + # sur_name: "the Sysadmin" + # email_address": ["admin@example.com"] + # contact_type": technical + + # Instead of putting the config inline as above, you can specify a + # separate pysaml2 configuration file: + # + #config_path: "%(config_dir_path)s/sp_conf.py" + + # the lifetime of a SAML session. This defines how long a user has to + # complete the authentication process, if allow_unsolicited is unset. + # The default is 5 minutes. + # + #saml_session_lifetime: 5m + + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn """ % { "config_dir_path": config_dir_path } + + +DOT_REPLACE_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)) +) + + +def dot_replace_for_mxid(username: str) -> str: + username = username.lower() + username = DOT_REPLACE_PATTERN.sub(".", username) + + # regular mxids aren't allowed to start with an underscore either + username = re.sub("^_", "", username) + return username + + +MXID_MAPPER_MAP = { + "hexencode": map_username_to_mxid_localpart, + "dotreplace": dot_replace_for_mxid, +} diff --git a/synapse/config/server.py b/synapse/config/server.py index 2abdef097..afc4d6a4a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -19,6 +19,7 @@ import logging import os.path import re from textwrap import indent +from typing import List import attr import yaml @@ -48,8 +49,17 @@ ROOM_COMPLEXITY_TOO_GREAT = ( "to join this room." ) +METRICS_PORT_WARNING = """\ +The metrics_port configuration option is deprecated in Synapse 0.31 in favour of +a listener. Please see +https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md +on how to configure the new listener. +--------------------------------------------------------------------------------""" + class ServerConfig(Config): + section = "server" + def read_config(self, config, **kwargs): self.server_name = config["server_name"] self.server_context = config.get("server_context", None) @@ -162,6 +172,23 @@ class ServerConfig(Config): self.mau_trial_days = config.get("mau_trial_days", 0) + # How long to keep redacted events in the database in unredacted form + # before redacting them. + redaction_retention_period = config.get("redaction_retention_period", "7d") + if redaction_retention_period is not None: + self.redaction_retention_period = self.parse_duration( + redaction_retention_period + ) + else: + self.redaction_retention_period = None + + # How long to keep entries in the `users_ips` table. + user_ips_max_age = config.get("user_ips_max_age", "28d") + if user_ips_max_age is not None: + self.user_ips_max_age = self.parse_duration(user_ips_max_age) + else: + self.user_ips_max_age = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -219,7 +246,7 @@ class ServerConfig(Config): # events with profile information that differ from the target's global profile. self.allow_per_room_profiles = config.get("allow_per_room_profiles", True) - self.listeners = [] + self.listeners = [] # type: List[dict] for listener in config.get("listeners", []): if not isinstance(listener.get("port", None), int): raise ConfigError( @@ -263,7 +290,10 @@ class ServerConfig(Config): validator=attr.validators.instance_of(bool), default=False ) complexity = attr.ib( - validator=attr.validators.instance_of((int, float)), default=1.0 + validator=attr.validators.instance_of( + (float, int) # type: ignore[arg-type] # noqa + ), + default=1.0, ) complexity_error = attr.ib( validator=attr.validators.instance_of(str), @@ -324,14 +354,7 @@ class ServerConfig(Config): metrics_port = config.get("metrics_port") if metrics_port: - logger.warn( - ( - "The metrics_port configuration option is deprecated in Synapse 0.31 " - "in favour of a listener. Please see " - "http://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.rst" - " on how to configure the new listener." - ) - ) + logger.warning(METRICS_PORT_WARNING) self.listeners.append( { @@ -345,13 +368,11 @@ class ServerConfig(Config): _check_resource_config(self.listeners) - # An experimental option to try and periodically clean up extremities - # by sending dummy events. self.cleanup_extremities_with_dummy_events = config.get( - "cleanup_extremities_with_dummy_events", False + "cleanup_extremities_with_dummy_events", True ) - def has_tls_listener(self): + def has_tls_listener(self) -> bool: return any(l["tls"] for l in self.listeners) def generate_config_section( @@ -535,6 +556,9 @@ class ServerConfig(Config): # blacklist IP address CIDR ranges. If this option is not specified, or # specified with an empty list, no ip range blacklist will be enforced. # + # As of Synapse v1.4.0 this option also affects any outbound requests to identity + # servers provided by user input. + # # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly # listed here, since they correspond to unroutable addresses.) # @@ -561,8 +585,8 @@ class ServerConfig(Config): # # type: the type of listener. Normally 'http', but other valid options are: # 'manhole' (see docs/manhole.md), - # 'metrics' (see docs/metrics-howto.rst), - # 'replication' (see docs/workers.rst). + # 'metrics' (see docs/metrics-howto.md), + # 'replication' (see docs/workers.md). # # tls: set to true to enable TLS for this listener. Will use the TLS # key/cert specified in tls_private_key_path / tls_certificate_path. @@ -597,12 +621,12 @@ class ServerConfig(Config): # # media: the media API (/_matrix/media). # - # metrics: the metrics interface. See docs/metrics-howto.rst. + # metrics: the metrics interface. See docs/metrics-howto.md. # # openid: OpenID authentication. # # replication: the HTTP replication API (/_synapse/replication). See - # docs/workers.rst. + # docs/workers.md. # # static: static resources under synapse/static (/_matrix/static). (Mostly # useful for 'fallback authentication'.) @@ -622,7 +646,7 @@ class ServerConfig(Config): # that unwraps TLS. # # If you plan to use a reverse proxy, please see - # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md. # %(unsecure_http_bindings)s @@ -718,6 +742,19 @@ class ServerConfig(Config): # Defaults to 'true'. # #allow_per_room_profiles: false + + # How long to keep redacted events in unredacted form in the database. After + # this period redacted events get replaced with their redacted form in the DB. + # + # Defaults to `7d`. Set to `null` to disable. + # + #redaction_retention_period: 28d + + # How long to track users' last seen time and IPs in the database. + # + # Defaults to `28d`. Set to `null` to disable clearing out of old rows. + # + #user_ips_max_age: 14d """ % locals() ) diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index eaac3d73b..6ea2ea886 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -59,8 +59,10 @@ class ServerNoticesConfig(Config): None if server notices are not enabled. """ - def __init__(self): - super(ServerNoticesConfig, self).__init__() + section = "servernotices" + + def __init__(self, *args): + super(ServerNoticesConfig, self).__init__(*args) self.server_notices_mxid = None self.server_notices_mxid_display_name = None self.server_notices_mxid_avatar_url = None diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index e40797ab5..36e0ddab5 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -19,6 +19,8 @@ from ._base import Config class SpamCheckerConfig(Config): + section = "spamchecker" + def read_config(self, config, **kwargs): self.spam_checker = None diff --git a/synapse/config/stats.py b/synapse/config/stats.py index b18ddbd1f..62485189e 100644 --- a/synapse/config/stats.py +++ b/synapse/config/stats.py @@ -25,6 +25,8 @@ class StatsConfig(Config): Configuration for the behaviour of synapse's stats engine """ + section = "stats" + def read_config(self, config, **kwargs): self.stats_enabled = True self.stats_bucket_size = 86400 * 1000 diff --git a/synapse/config/third_party_event_rules.py b/synapse/config/third_party_event_rules.py index b3431441b..10a99c792 100644 --- a/synapse/config/third_party_event_rules.py +++ b/synapse/config/third_party_event_rules.py @@ -19,6 +19,8 @@ from ._base import Config class ThirdPartyRulesConfig(Config): + section = "thirdpartyrules" + def read_config(self, config, **kwargs): self.third_party_event_rules = None diff --git a/synapse/config/tls.py b/synapse/config/tls.py index c0148aa95..f06341eb6 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -18,6 +18,7 @@ import os import warnings from datetime import datetime from hashlib import sha256 +from typing import List import six @@ -33,7 +34,9 @@ logger = logging.getLogger(__name__) class TlsConfig(Config): - def read_config(self, config, config_dir_path, **kwargs): + section = "tls" + + def read_config(self, config: dict, config_dir_path: str, **kwargs): acme_config = config.get("acme", None) if acme_config is None: @@ -57,7 +60,7 @@ class TlsConfig(Config): self.tls_certificate_file = self.abspath(config.get("tls_certificate_path")) self.tls_private_key_file = self.abspath(config.get("tls_private_key_path")) - if self.has_tls_listener(): + if self.root.server.has_tls_listener(): if not self.tls_certificate_file: raise ConfigError( "tls_certificate_path must be specified if TLS-enabled listeners are " @@ -108,10 +111,17 @@ class TlsConfig(Config): ) # Support globs (*) in whitelist values - self.federation_certificate_verification_whitelist = [] + self.federation_certificate_verification_whitelist = [] # type: List[str] for entry in fed_whitelist_entries: + try: + entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii")) + except UnicodeEncodeError: + raise ConfigError( + "IDNA domain names are not allowed in the " + "federation_certificate_verification_whitelist: %s" % (entry,) + ) + # Convert globs to regex - entry_regex = glob_to_regex(entry) self.federation_certificate_verification_whitelist.append(entry_regex) # List of custom certificate authorities for federation traffic validation diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 95e7ccb3a..8be134611 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -13,10 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.python_dependencies import DependencyException, check_requirements + from ._base import Config, ConfigError class TracerConfig(Config): + section = "tracing" + def read_config(self, config, **kwargs): opentracing_config = config.get("opentracing") if opentracing_config is None: @@ -32,6 +36,11 @@ class TracerConfig(Config): if not self.opentracer_enabled: return + try: + check_requirements("opentracing") + except DependencyException as e: + raise ConfigError(e.message) + # The tracer is enabled so sanitize the config self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", []) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index f6313e17d..c8d19c5d6 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -21,6 +21,8 @@ class UserDirectoryConfig(Config): Configuration for the behaviour of the /user_directory API """ + section = "userdirectory" + def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 2ca0e1cf7..a68a3068a 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -16,6 +16,8 @@ from ._base import Config class VoipConfig(Config): + section = "voip" + def read_config(self, config, **kwargs): self.turn_uris = config.get("turn_uris", []) self.turn_shared_secret = config.get("turn_shared_secret") diff --git a/synapse/config/workers.py b/synapse/config/workers.py index bc0fc165e..fef72ed97 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2016 matrix.org +# Copyright 2016 OpenMarket Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ class WorkerConfig(Config): They have their own pid_file and listener configuration. They use the replication_url to talk to the main synapse process.""" + section = "worker" + def read_config(self, config, **kwargs): self.worker_app = config.get("worker_app") diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index 06e63a96b..e93f0b370 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -15,7 +15,6 @@ import logging -import idna from service_identity import VerificationError from service_identity.pyopenssl import verify_hostname, verify_ip_address from zope.interface import implementer @@ -114,14 +113,20 @@ class ClientTLSOptionsFactory(object): self._no_verify_ssl_context = self._no_verify_ssl.getContext() self._no_verify_ssl_context.set_info_callback(self._context_info_cb) - def get_options(self, host): + def get_options(self, host: bytes): + + # IPolicyForHTTPS.get_options takes bytes, but we want to compare + # against the str whitelist. The hostnames in the whitelist are already + # IDNA-encoded like the hosts will be here. + ascii_host = host.decode("ascii") + # Check if certificate verification has been enabled should_verify = self._config.federation_verify_certificates # Check if we've disabled certificate verification for this host if should_verify: for regex in self._config.federation_certificate_verification_whitelist: - if regex.match(host): + if regex.match(ascii_host): should_verify = False break @@ -162,7 +167,7 @@ class SSLClientConnectionCreator(object): Replaces twisted.internet.ssl.ClientTLSOptions """ - def __init__(self, hostname, ctx, verify_certs): + def __init__(self, hostname: bytes, ctx, verify_certs: bool): self._ctx = ctx self._verifier = ConnectionVerifier(hostname, verify_certs) @@ -190,21 +195,16 @@ class ConnectionVerifier(object): # This code is based on twisted.internet.ssl.ClientTLSOptions. - def __init__(self, hostname, verify_certs): + def __init__(self, hostname: bytes, verify_certs): self._verify_certs = verify_certs - if isIPAddress(hostname) or isIPv6Address(hostname): - self._hostnameBytes = hostname.encode("ascii") + _decoded = hostname.decode("ascii") + if isIPAddress(_decoded) or isIPv6Address(_decoded): self._is_ip_address = True else: - # twisted's ClientTLSOptions falls back to the stdlib impl here if - # idna is not installed, but points out that lacks support for - # IDNA2008 (http://bugs.python.org/issue17305). - # - # We can rely on having idna. - self._hostnameBytes = idna.encode(hostname) self._is_ip_address = False + self._hostnameBytes = hostname self._hostnameASCII = self._hostnameBytes.decode("ascii") def verify_context_info_cb(self, ssl_connection, where): diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e5f0b90ae..21e52c969 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -36,7 +36,6 @@ from synapse.api.errors import ( UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.crypto.event_signing import compute_event_signature from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions @@ -322,18 +321,6 @@ class FederationServer(FederationBase): pdus = yield self.handler.get_state_for_pdu(room_id, event_id) auth_chain = yield self.store.get_auth_chain([pdu.event_id for pdu in pdus]) - for event in auth_chain: - # We sign these again because there was a bug where we - # incorrectly signed things the first time round - if self.hs.is_mine_id(event.event_id): - event.signatures.update( - compute_event_signature( - event.get_pdu_json(), - self.hs.hostname, - self.hs.config.signing_key[0], - ) - ) - return { "pdus": [pdu.get_pdu_json() for pdu in pdus], "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain], @@ -669,9 +656,9 @@ class FederationServer(FederationBase): return ret @defer.inlineCallbacks - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): ret = yield self.handler.on_exchange_third_party_invite_request( - origin, room_id, event_dict + room_id, event_dict ) return ret diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index d46f4aaeb..2b2ee8612 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -38,7 +38,7 @@ from synapse.metrics import ( events_processed_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.util.metrics import measure_func +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -183,8 +183,8 @@ class FederationSender(object): # Otherwise if the last member on a server in a room is # banned then it won't receive the event because it won't # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids() + destinations = yield self.state.get_hosts_in_room_at_events( + event.room_id, event_ids=event.prev_event_ids() ) except Exception: logger.exception( @@ -207,8 +207,9 @@ class FederationSender(object): @defer.inlineCallbacks def handle_room_events(events): - for event in events: - yield handle_event(event) + with Measure(self.clock, "handle_room_events"): + for event in events: + yield handle_event(event) events_by_room = {} for event in events: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 132a8fb5e..0f16f21c2 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -165,7 +165,7 @@ class Authenticator(object): async def _reset_retry_timings(self, origin): try: logger.info("Marking origin %r as up", origin) - await self.store.set_destination_retry_timings(origin, 0, 0) + await self.store.set_destination_retry_timings(origin, None, 0, 0) except Exception: logger.exception("Error resetting retry timings on %s", origin) @@ -575,7 +575,7 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): async def on_PUT(self, origin, content, query, room_id): content = await self.handler.on_exchange_third_party_invite_request( - origin, room_id, content + room_id, content ) return 200, content @@ -765,6 +765,10 @@ class PublicRoomList(BaseFederationServlet): else: network_tuple = ThirdPartyInstanceID(None, None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await maybeDeferred( self.handler.get_local_public_room_list, limit, @@ -800,6 +804,10 @@ class PublicRoomList(BaseFederationServlet): if search_filter is None: logger.warning("Nonefilter") + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + data = await self.handler.get_local_public_room_list( limit=limit, since_token=since_token, diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index d50e69143..8f10b6adb 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2017 Vector Creations Ltd # Copyright 2018 New Vector Ltd +# Copyright 2019 Michael Telatynski <7t3chguy@gmail.com> # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,16 +21,16 @@ from six import string_types from twisted.internet import defer -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.types import GroupID, RoomID, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute logger = logging.getLogger(__name__) -# TODO: Allow users to "knock" or simpkly join depending on rules +# TODO: Allow users to "knock" or simply join depending on rules # TODO: Federation admin APIs -# TODO: is_priveged flag to users and is_public to users and rooms +# TODO: is_privileged flag to users and is_public to users and rooms # TODO: Audit log for admins (profile updates, membership changes, users who tried # to join but were rejected, etc) # TODO: Flairs @@ -590,7 +591,18 @@ class GroupsServerHandler(object): ) # TODO: Check if user knocked - # TODO: Check if user is already invited + + invited_users = yield self.store.get_invited_users_in_group(group_id) + if user_id in invited_users: + raise SynapseError( + 400, "User already invited to group", errcode=Codes.BAD_STATE + ) + + user_results = yield self.store.get_users_in_group( + group_id, include_private=True + ) + if user_id in [user_result["user_id"] for user_result in user_results]: + raise SynapseError(400, "User already in group") content = { "profile": {"name": group["name"], "avatar_url": group["avatar_url"]}, diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index c29c78bd6..d15c6282f 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -45,6 +45,7 @@ class BaseHandler(object): self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() self.ratelimiter = hs.get_ratelimiter() + self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter() self.clock = hs.get_clock() self.hs = hs @@ -53,7 +54,7 @@ class BaseHandler(object): self.event_builder_factory = hs.get_event_builder_factory() @defer.inlineCallbacks - def ratelimit(self, requester, update=True): + def ratelimit(self, requester, update=True, is_admin_redaction=False): """Ratelimits requests. Args: @@ -62,6 +63,9 @@ class BaseHandler(object): Set to False when doing multiple checks for one request (e.g. to check up front if we would reject the request), and set to True for the last call for a given request. + is_admin_redaction (bool): Whether this is a room admin/moderator + redacting an event. If so then we may apply different + ratelimits depending on config. Raises: LimitExceededError if the request should be ratelimited @@ -90,16 +94,33 @@ class BaseHandler(object): messages_per_second = override.messages_per_second burst_count = override.burst_count else: - messages_per_second = self.hs.config.rc_message.per_second - burst_count = self.hs.config.rc_message.burst_count + # We default to different values if this is an admin redaction and + # the config is set + if is_admin_redaction and self.hs.config.rc_admin_redaction: + messages_per_second = self.hs.config.rc_admin_redaction.per_second + burst_count = self.hs.config.rc_admin_redaction.burst_count + else: + messages_per_second = self.hs.config.rc_message.per_second + burst_count = self.hs.config.rc_message.burst_count - allowed, time_allowed = self.ratelimiter.can_do_action( - user_id, - time_now, - rate_hz=messages_per_second, - burst_count=burst_count, - update=update, - ) + if is_admin_redaction and self.hs.config.rc_admin_redaction: + # If we have separate config for admin redactions we use a separate + # ratelimiter + allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) + else: + allowed, time_allowed = self.ratelimiter.can_do_action( + user_id, + time_now, + rate_hz=messages_per_second, + burst_count=burst_count, + update=update, + ) if not allowed: raise LimitExceededError( retry_after_ms=int(1000 * (time_allowed - time_now)) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index d0c014274..333eb3062 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -21,10 +21,8 @@ import unicodedata import attr import bcrypt import pymacaroons -from canonicaljson import json from twisted.internet import defer -from twisted.web.client import PartialDownloadError import synapse.util.stringutils as stringutils from synapse.api.constants import LoginType @@ -38,7 +36,8 @@ from synapse.api.errors import ( UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter -from synapse.config.emailconfig import ThreepidBehaviour +from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS +from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.logging.context import defer_to_thread from synapse.module_api import ModuleApi from synapse.types import UserID @@ -58,13 +57,13 @@ class AuthHandler(BaseHandler): hs (synapse.server.HomeServer): """ super(AuthHandler, self).__init__(hs) - self.checkers = { - LoginType.RECAPTCHA: self._check_recaptcha, - LoginType.EMAIL_IDENTITY: self._check_email_identity, - LoginType.MSISDN: self._check_msisdn, - LoginType.DUMMY: self._check_dummy_auth, - LoginType.TERMS: self._check_terms_auth, - } + + self.checkers = {} # type: dict[str, UserInteractiveAuthChecker] + for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: + inst = auth_checker_class(hs) + if inst.is_enabled(): + self.checkers[inst.AUTH_TYPE] = inst + self.bcrypt_rounds = hs.config.bcrypt_rounds # This is not a cache per se, but a store of all current sessions that @@ -158,6 +157,14 @@ class AuthHandler(BaseHandler): return params + def get_enabled_auth_types(self): + """Return the enabled user-interactive authentication types + + Returns the UI-Auth types which are supported by the homeserver's current + config. + """ + return self.checkers.keys() + @defer.inlineCallbacks def check_auth(self, flows, clientdict, clientip): """ @@ -292,7 +299,7 @@ class AuthHandler(BaseHandler): sess["creds"] = {} creds = sess["creds"] - result = yield self.checkers[stagetype](authdict, clientip) + result = yield self.checkers[stagetype].check_auth(authdict, clientip) if result: creds[stagetype] = result self._save_session(sess) @@ -363,7 +370,7 @@ class AuthHandler(BaseHandler): login_type = authdict["type"] checker = self.checkers.get(login_type) if checker is not None: - res = yield checker(authdict, clientip=clientip) + res = yield checker.check_auth(authdict, clientip=clientip) return res # build a v1-login-style dict out of the authdict and fall back to the @@ -376,116 +383,6 @@ class AuthHandler(BaseHandler): (canonical_id, callback) = yield self.validate_login(user_id, authdict) return canonical_id - @defer.inlineCallbacks - def _check_recaptcha(self, authdict, clientip, **kwargs): - try: - user_response = authdict["response"] - except KeyError: - # Client tried to provide captcha but didn't give the parameter: - # bad request. - raise LoginError( - 400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED - ) - - logger.info( - "Submitting recaptcha response %s with remoteip %s", user_response, clientip - ) - - # TODO: get this from the homeserver rather than creating a new one for - # each request - try: - client = self.hs.get_simple_http_client() - resp_body = yield client.post_urlencoded_get_json( - self.hs.config.recaptcha_siteverify_api, - args={ - "secret": self.hs.config.recaptcha_private_key, - "response": user_response, - "remoteip": clientip, - }, - ) - except PartialDownloadError as pde: - # Twisted is silly - data = pde.response - resp_body = json.loads(data) - - if "success" in resp_body: - # Note that we do NOT check the hostname here: we explicitly - # intend the CAPTCHA to be presented by whatever client the - # user is using, we just care that they have completed a CAPTCHA. - logger.info( - "%s reCAPTCHA from hostname %s", - "Successful" if resp_body["success"] else "Failed", - resp_body.get("hostname"), - ) - if resp_body["success"]: - return True - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - - def _check_email_identity(self, authdict, **kwargs): - return self._check_threepid("email", authdict, **kwargs) - - def _check_msisdn(self, authdict, **kwargs): - return self._check_threepid("msisdn", authdict) - - def _check_dummy_auth(self, authdict, **kwargs): - return defer.succeed(True) - - def _check_terms_auth(self, authdict, **kwargs): - return defer.succeed(True) - - @defer.inlineCallbacks - def _check_threepid(self, medium, authdict, **kwargs): - if "threepid_creds" not in authdict: - raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) - - threepid_creds = authdict["threepid_creds"] - - identity_handler = self.hs.get_handlers().identity_handler - - logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - threepid = yield identity_handler.threepid_from_creds(threepid_creds) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: - row = yield self.store.get_threepid_validation_session( - medium, - threepid_creds["client_secret"], - sid=threepid_creds["sid"], - validated=True, - ) - - threepid = ( - { - "medium": row["medium"], - "address": row["address"], - "validated_at": row["validated_at"], - } - if row - else None - ) - - if row: - # Valid threepid returned, delete from the db - yield self.store.delete_threepid_session(threepid_creds["sid"]) - else: - raise SynapseError( - 400, "Password resets are not enabled on this homeserver" - ) - - if not threepid: - raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) - - if threepid["medium"] != medium: - raise LoginError( - 401, - "Expecting threepid of type '%s', got '%s'" - % (medium, threepid["medium"]), - errcode=Codes.UNAUTHORIZED, - ) - - threepid["threepid_creds"] = authdict["threepid_creds"] - - return threepid - def _get_params_recaptcha(self): return {"public_key": self.hs.config.recaptcha_public_key} diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 5f804d1f1..63267a0a4 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -73,7 +73,9 @@ class DeactivateAccountHandler(BaseHandler): # unbinding identity_server_supports_unbinding = True - threepids = yield self.store.user_get_threepids(user_id) + # Retrieve the 3PIDs this user has bound to an identity server + threepids = yield self.store.user_get_bound_threepids(user_id) + for threepid in threepids: try: result = yield self._identity_handler.try_unbind_threepid( @@ -118,6 +120,10 @@ class DeactivateAccountHandler(BaseHandler): # parts users from rooms (if it isn't already running) self._start_user_parting() + # Reject all pending invites for the user, so that the user doesn't show up in the + # "invited" section of rooms' members list. + yield self._reject_pending_invites_for_user(user_id) + # Remove all information on the user from the account_validity table. if self._account_validity_enabled: yield self.store.delete_account_validity_for_user(user_id) @@ -127,6 +133,39 @@ class DeactivateAccountHandler(BaseHandler): return identity_server_supports_unbinding + @defer.inlineCallbacks + def _reject_pending_invites_for_user(self, user_id): + """Reject pending invites addressed to a given user ID. + + Args: + user_id (str): The user ID to reject pending invites for. + """ + user = UserID.from_string(user_id) + pending_invites = yield self.store.get_invited_rooms_for_user(user_id) + + for room in pending_invites: + try: + yield self._room_member_handler.update_membership( + create_requester(user), + user, + room.room_id, + "leave", + ratelimit=False, + require_consent=False, + ) + logger.info( + "Rejected invite for deactivated user %r in room %r", + user_id, + room.room_id, + ) + except Exception: + logger.exception( + "Failed to reject invite for user %r in room %r:" + " ignoring and continuing", + user_id, + room.room_id, + ) + def _start_user_parting(self): """ Start the process that goes through the table of users diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 997ad66f8..be675197f 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -326,16 +326,10 @@ class E2eKeysHandler(object): results = yield self.store.get_e2e_device_keys(local_query) - # Build the result structure, un-jsonify the results, and add the - # "unsigned" section + # Build the result structure for user_id, device_keys in results.items(): for device_id, device_info in device_keys.items(): - r = dict(device_info["keys"]) - r["unsigned"] = {} - display_name = device_info["device_display_name"] - if display_name is not None: - r["unsigned"]["device_display_name"] = display_name - result_dict[user_id][device_id] = r + result_dict[user_id][device_id] = device_info log_kv(results) return result_dict diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index a9d80f708..0cea445f0 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -352,8 +352,8 @@ class E2eRoomKeysHandler(object): A deferred of an empty dict. """ if "version" not in version_info: - raise SynapseError(400, "Missing version in body", Codes.MISSING_PARAM) - if version_info["version"] != version: + version_info["version"] = version + elif version_info["version"] != version: raise SynapseError( 400, "Version in body does not match", Codes.INVALID_PARAM ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 538b16efd..50fc0fde2 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2530,12 +2530,17 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def on_exchange_third_party_invite_request(self, origin, room_id, event_dict): + def on_exchange_third_party_invite_request(self, room_id, event_dict): """Handle an exchange_third_party_invite request from a remote server The remote server will call this when it wants to turn a 3pid invite into a normal m.room.member invite. + Args: + room_id (str): The ID of the room. + + event_dict (dict[str, Any]): Dictionary containing the event body. + Returns: Deferred: resolves (to None) """ @@ -2565,7 +2570,7 @@ class FederationHandler(BaseHandler): ) try: - self.auth.check_from_context(room_version, event, context) + yield self.auth.check_from_context(room_version, event, context) except AuthError as e: logger.warn("Denying third party invite %r because %s", event, e) raise e @@ -2594,7 +2599,12 @@ class FederationHandler(BaseHandler): original_invite_id, allow_none=True ) if original_invite: - display_name = original_invite.content["display_name"] + # If the m.room.third_party_invite event's content is empty, it means the + # invite has been revoked. In this case, we don't have to raise an error here + # because the auth check will fail on the invite (because it's not able to + # fetch public keys from the m.room.third_party_invite event's content, which + # is empty). + display_name = original_invite.content.get("display_name") event_dict["content"]["third_party_invite"]["display_name"] = display_name else: logger.info( diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 45db1c1c0..ba99ddf76 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -18,160 +18,150 @@ """Utilities for interacting with Identity Servers""" import logging +import urllib from canonicaljson import json +from signedjson.key import decode_verify_key_bytes +from signedjson.sign import verify_signed_json +from unpaddedbase64 import decode_base64 from twisted.internet import defer +from twisted.internet.error import TimeoutError from synapse.api.errors import ( + AuthError, CodeMessageException, Codes, HttpResponseException, SynapseError, ) +from synapse.config.emailconfig import ThreepidBehaviour +from synapse.http.client import SimpleHttpClient +from synapse.util.hash import sha256_and_url_safe_base64 from synapse.util.stringutils import random_string from ._base import BaseHandler logger = logging.getLogger(__name__) +id_server_scheme = "https://" + class IdentityHandler(BaseHandler): def __init__(self, hs): super(IdentityHandler, self).__init__(hs) - self.http_client = hs.get_simple_http_client() + self.http_client = SimpleHttpClient(hs) + # We create a blacklisting instance of SimpleHttpClient for contacting identity + # servers specified by clients + self.blacklisting_http_client = SimpleHttpClient( + hs, ip_blacklist=hs.config.federation_ip_range_blacklist + ) self.federation_http_client = hs.get_http_client() self.hs = hs - def _extract_items_from_creds_dict(self, creds): - """ - Retrieve entries from a "credentials" dictionary - - Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: - * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. - - Returns: - tuple(str, str, str|None): A tuple containing the client_secret, the id_server, - and the id_access_token value if available. - """ - client_secret = creds.get("client_secret") or creds.get("clientSecret") - if not client_secret: - raise SynapseError( - 400, "No client_secret in creds", errcode=Codes.MISSING_PARAM - ) - - id_server = creds.get("id_server") or creds.get("idServer") - if not id_server: - raise SynapseError( - 400, "No id_server in creds", errcode=Codes.MISSING_PARAM - ) - - id_access_token = creds.get("id_access_token") - return client_secret, id_server, id_access_token - @defer.inlineCallbacks - def threepid_from_creds(self, creds, use_v2=True): + def threepid_from_creds(self, id_server, creds): """ - Retrieve and validate a threepid identitier from a "credentials" dictionary + Retrieve and validate a threepid identifier from a "credentials" dictionary against a + given identity server Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: + id_server (str): The identity server to validate 3PIDs against. Must be a + complete URL including the protocol (http(s)://) + + creds (dict[str, str]): Dictionary containing the following keys: * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. Required if use_v2 is true - use_v2 (bool): Whether to use v2 Identity Service API endpoints + * sid: The ID of the validation session Returns: Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to the /getValidated3pid endpoint of the Identity Service API, or None if the threepid was not found """ - client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( - creds - ) - - # If an id_access_token is not supplied, force usage of v1 - if id_access_token is None: - use_v2 = False - - query_params = {"sid": creds["sid"], "client_secret": client_secret} - - # Decide which API endpoint URLs and query parameters to use - if use_v2: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/v2/3pid/getValidated3pid", + client_secret = creds.get("client_secret") or creds.get("clientSecret") + if not client_secret: + raise SynapseError( + 400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM ) - query_params["id_access_token"] = id_access_token - else: - url = "https://%s%s" % ( - id_server, - "/_matrix/identity/api/v1/3pid/getValidated3pid", + session_id = creds.get("sid") + if not session_id: + raise SynapseError( + 400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM ) + query_params = {"sid": session_id, "client_secret": client_secret} + + url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid" + try: data = yield self.http_client.get_json(url, query_params) - return data if "medium" in data else None + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: - if e.code != 404 or not use_v2: - # Generic failure - logger.info("getValidated3pid failed with Matrix error: %r", e) - raise e.to_synapse_error() + logger.info( + "%s returned %i for threepid validation for: %s", + id_server, + e.code, + creds, + ) + return None - # This identity server is too old to understand Identity Service API v2 - # Attempt v1 endpoint - logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", url) - return (yield self.threepid_from_creds(creds, use_v2=False)) + # Old versions of Sydent return a 200 http code even on a failed validation + # check. Thus, in addition to the HttpResponseException check above (which + # checks for non-200 errors), we need to make sure validation_session isn't + # actually an error, identified by the absence of a "medium" key + # See https://github.com/matrix-org/sydent/issues/215 for details + if "medium" in data: + return data + + logger.info("%s reported non-validated threepid: %s", id_server, creds) + return None @defer.inlineCallbacks - def bind_threepid(self, creds, mxid, use_v2=True): + def bind_threepid( + self, client_secret, sid, mxid, id_server, id_access_token=None, use_v2=True + ): """Bind a 3PID to an identity server Args: - creds (dict[str, str]): Dictionary of credentials that contain the following keys: - * client_secret|clientSecret: A unique secret str provided by the client - * id_server|idServer: the domain of the identity server to query - * id_access_token: The access token to authenticate to the identity - server with. Required if use_v2 is true + client_secret (str): A unique secret provided by the client + + sid (str): The ID of the validation session + mxid (str): The MXID to bind the 3PID to - use_v2 (bool): Whether to use v2 Identity Service API endpoints + + id_server (str): The domain of the identity server to query + + id_access_token (str): The access token to authenticate to the identity + server with, if necessary. Required if use_v2 is true + + use_v2 (bool): Whether to use v2 Identity Service API endpoints. Defaults to True Returns: Deferred[dict]: The response from the identity server """ - logger.debug("binding threepid %r to %s", creds, mxid) - - client_secret, id_server, id_access_token = self._extract_items_from_creds_dict( - creds - ) - - sid = creds.get("sid") - if not sid: - raise SynapseError( - 400, "No sid in three_pid_creds", errcode=Codes.MISSING_PARAM - ) + logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server) # If an id_access_token is not supplied, force usage of v1 if id_access_token is None: use_v2 = False # Decide which API endpoint URLs to use + headers = {} bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,) - bind_data["id_access_token"] = id_access_token + headers["Authorization"] = create_id_access_token_header(id_access_token) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,) try: - data = yield self.http_client.post_json_get_json(bind_url, bind_data) - logger.debug("bound threepid %r to %s", creds, mxid) + # Use the blacklisting http client as this call is only to identity servers + # provided by a client + data = yield self.blacklisting_http_client.post_json_get_json( + bind_url, bind_data, headers=headers + ) # Remember where we bound the threepid yield self.store.add_user_bound_threepid( @@ -186,12 +176,17 @@ class IdentityHandler(BaseHandler): if e.code != 404 or not use_v2: logger.error("3PID bind failed with Matrix error: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") except CodeMessageException as e: data = json.loads(e.msg) # XXX WAT? return data logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url) - return (yield self.bind_threepid(creds, mxid, use_v2=False)) + res = yield self.bind_threepid( + client_secret, sid, mxid, id_server, id_access_token, use_v2=False + ) + return res @defer.inlineCallbacks def try_unbind_threepid(self, mxid, threepid): @@ -267,7 +262,11 @@ class IdentityHandler(BaseHandler): headers = {b"Authorization": auth_headers} try: - yield self.http_client.post_json_get_json(url, content, headers) + # Use the blacklisting http client as this call is only to identity servers + # provided by a client + yield self.blacklisting_http_client.post_json_get_json( + url, content, headers + ) changed = True except HttpResponseException as e: changed = False @@ -276,7 +275,9 @@ class IdentityHandler(BaseHandler): logger.warn("Received %d response while unbinding threepid", e.code) else: logger.error("Failed to unbind threepid on identity server: %s", e) - raise SynapseError(502, "Failed to contact identity server") + raise SynapseError(500, "Failed to contact identity server") + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") yield self.store.remove_user_bound_threepid( user_id=mxid, @@ -335,6 +336,15 @@ class IdentityHandler(BaseHandler): # Generate a session id session_id = random_string(16) + if next_link: + # Manipulate the next_link to add the sid, because the caller won't get + # it until we send a response, by which time we've sent the mail. + if "?" in next_link: + next_link += "&" + else: + next_link += "?" + next_link += "sid=" + urllib.parse.quote(session_id) + # Generate a new validation token token = random_string(32) @@ -409,6 +419,8 @@ class IdentityHandler(BaseHandler): except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") @defer.inlineCallbacks def requestMsisdnToken( @@ -457,7 +469,476 @@ class IdentityHandler(BaseHandler): id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", params, ) - return data except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + + assert self.hs.config.public_baseurl + + # we need to tell the client to send the token back to us, since it doesn't + # otherwise know where to send it, so add submit_url response parameter + # (see also MSC2078) + data["submit_url"] = ( + self.hs.config.public_baseurl + + "_matrix/client/unstable/add_threepid/msisdn/submit_token" + ) + return data + + @defer.inlineCallbacks + def validate_threepid_session(self, client_secret, sid): + """Validates a threepid session with only the client secret and session ID + Tries validating against any configured account_threepid_delegates as well as locally. + + Args: + client_secret (str): A secret provided by the client + + sid (str): The ID of the session + + Returns: + Dict[str, str|int] if validation was successful, otherwise None + """ + # XXX: We shouldn't need to keep wrapping and unwrapping this value + threepid_creds = {"client_secret": client_secret, "sid": sid} + + # We don't actually know which medium this 3PID is. Thus we first assume it's email, + # and if validation fails we try msisdn + validation_session = None + + # Try to validate as email + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + # Ask our delegated email identity server + validation_session = yield self.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + # Get a validated session matching these details + validation_session = yield self.store.get_threepid_validation_session( + "email", client_secret, sid=sid, validated=True + ) + + if validation_session: + return validation_session + + # Try to validate as msisdn + if self.hs.config.account_threepid_delegate_msisdn: + # Ask our delegated msisdn identity server + validation_session = yield self.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + + return validation_session + + @defer.inlineCallbacks + def proxy_msisdn_submit_token(self, id_server, client_secret, sid, token): + """Proxy a POST submitToken request to an identity server for verification purposes + + Args: + id_server (str): The identity server URL to contact + + client_secret (str): Secret provided by the client + + sid (str): The ID of the session + + token (str): The verification token + + Raises: + SynapseError: If we failed to contact the identity server + + Returns: + Deferred[dict]: The response dict from the identity server + """ + body = {"client_secret": client_secret, "sid": sid, "token": token} + + try: + return ( + yield self.http_client.post_json_get_json( + id_server + "/_matrix/identity/api/v1/validate/msisdn/submitToken", + body, + ) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + logger.warning("Error contacting msisdn account_threepid_delegate: %s", e) + raise SynapseError(400, "Error contacting the identity server") + + @defer.inlineCallbacks + def lookup_3pid(self, id_server, medium, address, id_access_token=None): + """Looks up a 3pid in the passed identity server. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + str|None: the matrix ID of the 3pid, or None if it is not recognized. + """ + if id_access_token is not None: + try: + results = yield self._lookup_3pid_v2( + id_server, id_access_token, medium, address + ) + return results + + except Exception as e: + # Catch HttpResponseExcept for a non-200 response code + # Check if this identity server does not know about v2 lookups + if isinstance(e, HttpResponseException) and e.code == 404: + # This is an old identity server that does not yet support v2 lookups + logger.warning( + "Attempted v2 lookup on v1 identity server %s. Falling " + "back to v1", + id_server, + ) + else: + logger.warning("Error when looking up hashing details: %s", e) + return None + + return (yield self._lookup_3pid_v1(id_server, medium, address)) + + @defer.inlineCallbacks + def _lookup_3pid_v1(self, id_server, medium, address): + """Looks up a 3pid in the passed identity server using v1 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + str: the matrix ID of the 3pid, or None if it is not recognized. + """ + try: + data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), + {"medium": medium, "address": address}, + ) + + if "mxid" in data: + if "signatures" not in data: + raise AuthError(401, "No signatures on 3pid binding") + yield self._verify_any_signature(data, id_server) + return data["mxid"] + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except IOError as e: + logger.warning("Error from v1 identity server lookup: %s" % (e,)) + + return None + + @defer.inlineCallbacks + def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): + """Looks up a 3pid in the passed identity server using v2 lookup. + + Args: + id_server (str): The server name (including port, if required) + of the identity server to use. + id_access_token (str): The access token to authenticate to the identity server with + medium (str): The type of the third party identifier (e.g. "email"). + address (str): The third party identifier (e.g. "foo@example.com"). + + Returns: + Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. + """ + # Check what hashing details are supported by this identity server + try: + hash_details = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), + {"access_token": id_access_token}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + + if not isinstance(hash_details, dict): + logger.warning( + "Got non-dict object when checking hash details of %s%s: %s", + id_server_scheme, + id_server, + hash_details, + ) + raise SynapseError( + 400, + "Non-dict object from %s%s during v2 hash_details request: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Extract information from hash_details + supported_lookup_algorithms = hash_details.get("algorithms") + lookup_pepper = hash_details.get("lookup_pepper") + if ( + not supported_lookup_algorithms + or not isinstance(supported_lookup_algorithms, list) + or not lookup_pepper + or not isinstance(lookup_pepper, str) + ): + raise SynapseError( + 400, + "Invalid hash details received from identity server %s%s: %s" + % (id_server_scheme, id_server, hash_details), + ) + + # Check if any of the supported lookup algorithms are present + if LookupAlgorithm.SHA256 in supported_lookup_algorithms: + # Perform a hashed lookup + lookup_algorithm = LookupAlgorithm.SHA256 + + # Hash address, medium and the pepper with sha256 + to_hash = "%s %s %s" % (address, medium, lookup_pepper) + lookup_value = sha256_and_url_safe_base64(to_hash) + + elif LookupAlgorithm.NONE in supported_lookup_algorithms: + # Perform a non-hashed lookup + lookup_algorithm = LookupAlgorithm.NONE + + # Combine together plaintext address and medium + lookup_value = "%s %s" % (address, medium) + + else: + logger.warning( + "None of the provided lookup algorithms of %s are supported: %s", + id_server, + supported_lookup_algorithms, + ) + raise SynapseError( + 400, + "Provided identity server does not support any v2 lookup " + "algorithms that this homeserver supports.", + ) + + # Authenticate with identity server given the access token from the client + headers = {"Authorization": create_id_access_token_header(id_access_token)} + + try: + lookup_results = yield self.blacklisting_http_client.post_json_get_json( + "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), + { + "addresses": [lookup_value], + "algorithm": lookup_algorithm, + "pepper": lookup_pepper, + }, + headers=headers, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except Exception as e: + logger.warning("Error when performing a v2 3pid lookup: %s", e) + raise SynapseError( + 500, "Unknown error occurred during identity server lookup" + ) + + # Check for a mapping from what we looked up to an MXID + if "mappings" not in lookup_results or not isinstance( + lookup_results["mappings"], dict + ): + logger.warning("No results from 3pid lookup") + return None + + # Return the MXID if it's available, or None otherwise + mxid = lookup_results["mappings"].get(lookup_value) + return mxid + + @defer.inlineCallbacks + def _verify_any_signature(self, data, server_hostname): + if server_hostname not in data["signatures"]: + raise AuthError(401, "No signature from server %s" % (server_hostname,)) + for key_name, signature in data["signatures"][server_hostname].items(): + try: + key_data = yield self.blacklisting_http_client.get_json( + "%s%s/_matrix/identity/api/v1/pubkey/%s" + % (id_server_scheme, server_hostname, key_name) + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + if "public_key" not in key_data: + raise AuthError( + 401, "No public key named %s from %s" % (key_name, server_hostname) + ) + verify_signed_json( + data, + server_hostname, + decode_verify_key_bytes( + key_name, decode_base64(key_data["public_key"]) + ), + ) + return + + @defer.inlineCallbacks + def ask_id_server_for_third_party_invite( + self, + requester, + id_server, + medium, + address, + room_id, + inviter_user_id, + room_alias, + room_avatar_url, + room_join_rules, + room_name, + inviter_display_name, + inviter_avatar_url, + id_access_token=None, + ): + """ + Asks an identity server for a third party invite. + + Args: + requester (Requester) + id_server (str): hostname + optional port for the identity server. + medium (str): The literal string "email". + address (str): The third party address being invited. + room_id (str): The ID of the room to which the user is invited. + inviter_user_id (str): The user ID of the inviter. + room_alias (str): An alias for the room, for cosmetic notifications. + room_avatar_url (str): The URL of the room's avatar, for cosmetic + notifications. + room_join_rules (str): The join rules of the email (e.g. "public"). + room_name (str): The m.room.name of the room. + inviter_display_name (str): The current display name of the + inviter. + inviter_avatar_url (str): The URL of the inviter's avatar. + id_access_token (str|None): The access token to authenticate to the identity + server with + + Returns: + A deferred tuple containing: + token (str): The token which must be signed to prove authenticity. + public_keys ([{"public_key": str, "key_validity_url": str}]): + public_key is a base64-encoded ed25519 public key. + fallback_public_key: One element from public_keys. + display_name (str): A user-friendly name to represent the invited + user. + """ + invite_config = { + "medium": medium, + "address": address, + "room_id": room_id, + "room_alias": room_alias, + "room_avatar_url": room_avatar_url, + "room_join_rules": room_join_rules, + "room_name": room_name, + "sender": inviter_user_id, + "sender_display_name": inviter_display_name, + "sender_avatar_url": inviter_avatar_url, + } + + # Add the identity service access token to the JSON body and use the v2 + # Identity Service endpoints if id_access_token is present + data = None + base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) + + if id_access_token: + key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + + # Attempt a v2 lookup + url = base_url + "/v2/store-invite" + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, + invite_config, + {"Authorization": create_id_access_token_header(id_access_token)}, + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + if e.code != 404: + logger.info("Failed to POST %s with JSON: %s", url, e) + raise e + + if data is None: + key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( + id_server_scheme, + id_server, + ) + url = base_url + "/api/v1/store-invite" + + try: + data = yield self.blacklisting_http_client.post_json_get_json( + url, invite_config + ) + except TimeoutError: + raise SynapseError(500, "Timed out contacting identity server") + except HttpResponseException as e: + logger.warning( + "Error trying to call /store-invite on %s%s: %s", + id_server_scheme, + id_server, + e, + ) + + if data is None: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + try: + data = yield self.blacklisting_http_client.post_urlencoded_get_json( + url, invite_config + ) + except HttpResponseException as e: + logger.warning( + "Error calling /store-invite on %s%s with fallback " + "encoding: %s", + id_server_scheme, + id_server, + e, + ) + raise e + + # TODO: Check for success + token = data["token"] + public_keys = data.get("public_keys", []) + if "public_key" in data: + fallback_public_key = { + "public_key": data["public_key"], + "key_validity_url": key_validity_url, + } + else: + fallback_public_key = public_keys[0] + + if not public_keys: + public_keys.append(fallback_public_key) + display_name = data["display_name"] + return token, public_keys, fallback_public_key, display_name + + +def create_id_access_token_header(id_access_token): + """Create an Authorization header for passing to SimpleHttpClient as the header value + of an HTTP request. + + Args: + id_access_token (str): An identity server access token. + + Returns: + list[str]: The ascii-encoded bearer token encased in a list. + """ + # Prefix with Bearer + bearer_token = "Bearer %s" % id_access_token + + # Encode headers to standard ascii + bearer_token.encode("ascii") + + # Return as a list as that's how SimpleHttpClient takes header values + return [bearer_token] + + +class LookupAlgorithm: + """ + Supported hashing algorithms when performing a 3PID lookup. + + SHA256 - Hashing an (address, medium, pepper) combo with sha256, then url-safe base64 + encoding + NONE - Not performing any hashing. Simply sending an (address, medium) combo in plaintext + """ + + SHA256 = "sha256" + NONE = "none" diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 111f7c7e2..0f8cce8ff 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -222,6 +222,13 @@ class MessageHandler(object): } +# The duration (in ms) after which rooms should be removed +# `_rooms_to_exclude_from_dummy_event_insertion` (with the effect that we will try +# to generate a dummy event for them once more) +# +_DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY = 7 * 24 * 60 * 60 * 1000 + + class EventCreationHandler(object): def __init__(self, hs): self.hs = hs @@ -258,6 +265,13 @@ class EventCreationHandler(object): self.config.block_events_without_consent_error ) + # Rooms which should be excluded from dummy insertion. (For instance, + # those without local users who can send events into the room). + # + # map from room id to time-of-last-attempt. + # + self._rooms_to_exclude_from_dummy_event_insertion = {} # type: dict[str, int] + # we need to construct a ConsentURIBuilder here, as it checks that the necessary # config options, but *only* if we have a configuration for which we are # going to need it. @@ -729,7 +743,27 @@ class EventCreationHandler(object): assert not self.config.worker_app if ratelimit: - yield self.base_handler.ratelimit(requester) + # We check if this is a room admin redacting an event so that we + # can apply different ratelimiting. We do this by simply checking + # it's not a self-redaction (to avoid having to look up whether the + # user is actually admin or not). + is_admin_redaction = False + if event.type == EventTypes.Redaction: + original_event = yield self.store.get_event( + event.redacts, + check_redacted=False, + get_prev_content=False, + allow_rejected=False, + allow_none=True, + ) + + is_admin_redaction = ( + original_event and event.sender != original_event.sender + ) + + yield self.base_handler.ratelimit( + requester, is_admin_redaction=is_admin_redaction + ) yield self.base_handler.maybe_kick_guest_users(event, context) @@ -868,9 +902,11 @@ class EventCreationHandler(object): """Background task to send dummy events into rooms that have a large number of extremities """ - + self._expire_rooms_to_exclude_from_dummy_event_insertion() room_ids = yield self.store.get_rooms_with_many_extremities( - min_count=10, limit=5 + min_count=10, + limit=5, + room_id_filter=self._rooms_to_exclude_from_dummy_event_insertion.keys(), ) for room_id in room_ids: @@ -884,32 +920,61 @@ class EventCreationHandler(object): members = yield self.state.get_current_users_in_room( room_id, latest_event_ids=latest_event_ids ) + dummy_event_sent = False + for user_id in members: + if not self.hs.is_mine_id(user_id): + continue + requester = create_requester(user_id) + try: + event, context = yield self.create_event( + requester, + { + "type": "org.matrix.dummy_event", + "content": {}, + "room_id": room_id, + "sender": user_id, + }, + prev_events_and_hashes=prev_events_and_hashes, + ) - user_id = None - for member in members: - if self.hs.is_mine_id(member): - user_id = member + event.internal_metadata.proactively_send = False + + yield self.send_nonmember_event( + requester, event, context, ratelimit=False + ) + dummy_event_sent = True break + except ConsentNotGivenError: + logger.info( + "Failed to send dummy event into room %s for user %s due to " + "lack of consent. Will try another user" % (room_id, user_id) + ) + except AuthError: + logger.info( + "Failed to send dummy event into room %s for user %s due to " + "lack of power. Will try another user" % (room_id, user_id) + ) - if not user_id: - # We don't have a joined user. - # TODO: We should do something here to stop the room from - # appearing next time. - continue + if not dummy_event_sent: + # Did not find a valid user in the room, so remove from future attempts + # Exclusion is time limited, so the room will be rechecked in the future + # dependent on _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY + logger.info( + "Failed to send dummy event into room %s. Will exclude it from " + "future attempts until cache expires" % (room_id,) + ) + now = self.clock.time_msec() + self._rooms_to_exclude_from_dummy_event_insertion[room_id] = now - requester = create_requester(user_id) - - event, context = yield self.create_event( - requester, - { - "type": "org.matrix.dummy_event", - "content": {}, - "room_id": room_id, - "sender": user_id, - }, - prev_events_and_hashes=prev_events_and_hashes, + def _expire_rooms_to_exclude_from_dummy_event_insertion(self): + expire_before = self.clock.time_msec() - _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY + to_expire = set() + for room_id, time in self._rooms_to_exclude_from_dummy_event_insertion.items(): + if time < expire_before: + to_expire.add(room_id) + for room_id in to_expire: + logger.debug( + "Expiring room id %s from dummy event insertion exclusion cache", + room_id, ) - - event.internal_metadata.proactively_send = False - - yield self.send_nonmember_event(requester, event, context, ratelimit=False) + del self._rooms_to_exclude_from_dummy_event_insertion[room_id] diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 053cf66b2..eda15bc62 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -24,6 +24,7 @@ The methods that define policy are: import logging from contextlib import contextmanager +from typing import Dict, Set from six import iteritems, itervalues @@ -179,8 +180,9 @@ class PresenceHandler(object): # we assume that all the sync requests on that process have stopped. # Stored as a dict from process_id to set of user_id, and a dict of # process_id to millisecond timestamp last updated. - self.external_process_to_current_syncs = {} - self.external_process_last_updated_ms = {} + self.external_process_to_current_syncs = {} # type: Dict[int, Set[str]] + self.external_process_last_updated_ms = {} # type: Dict[int, int] + self.external_sync_linearizer = Linearizer(name="external_sync_linearizer") # Start a LoopingCall in 30s that fires every 5s. @@ -349,10 +351,13 @@ class PresenceHandler(object): if now - last_update > EXTERNAL_PROCESS_EXPIRY ] for process_id in expired_process_ids: + # For each expired process drop tracking info and check the users + # that were syncing on that process to see if they need to be timed + # out. users_to_check.update( - self.external_process_last_updated_ms.pop(process_id, ()) + self.external_process_to_current_syncs.pop(process_id, ()) ) - self.external_process_last_update.pop(process_id) + self.external_process_last_updated_ms.pop(process_id) states = [ self.user_to_current_state.get(user_id, UserPresenceState.default(user_id)) @@ -803,17 +808,25 @@ class PresenceHandler(object): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "presence_delta"): - deltas = yield self.store.get_current_state_deltas(self._event_pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self._event_pos == room_max_stream_ordering: return + logger.debug( + "Processing presence stats %s->%s", + self._event_pos, + room_max_stream_ordering, + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self._event_pos, room_max_stream_ordering + ) yield self._handle_state_delta(deltas) - self._event_pos = deltas[-1]["stream_id"] + self._event_pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("presence").set( - self._event_pos + max_pos ) @defer.inlineCallbacks diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 975da57ff..53410f120 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -217,10 +217,9 @@ class RegistrationHandler(BaseHandler): else: # autogen a sequential user ID - attempts = 0 user = None while not user: - localpart = yield self._generate_user_id(attempts > 0) + localpart = yield self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() yield self.check_user_id_not_appservice_exclusive(user_id) @@ -238,7 +237,6 @@ class RegistrationHandler(BaseHandler): # if user id is taken, just generate another user = None user_id = None - attempts += 1 if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) @@ -275,16 +273,12 @@ class RegistrationHandler(BaseHandler): fake_requester = create_requester(user_id) # try to create the room if we're the first real user on the server. Note - # that an auto-generated support user is not a real user and will never be + # that an auto-generated support or bot user is not a real user and will never be # the user to create the room should_auto_create_rooms = False - is_support = yield self.store.is_support_user(user_id) - # There is an edge case where the first user is the support user, then - # the room is never created, though this seems unlikely and - # recoverable from given the support user being involved in the first - # place. - if self.hs.config.autocreate_auto_join_rooms and not is_support: - count = yield self.store.count_all_users() + is_real_user = yield self.store.is_real_user(user_id) + if self.hs.config.autocreate_auto_join_rooms and is_real_user: + count = yield self.store.count_real_users() should_auto_create_rooms = count == 1 for r in self.hs.config.auto_join_rooms: logger.info("Auto-joining %s to %s", user_id, r) @@ -383,10 +377,10 @@ class RegistrationHandler(BaseHandler): ) @defer.inlineCallbacks - def _generate_user_id(self, reseed=False): - if reseed or self._next_generated_user_id is None: + def _generate_user_id(self): + if self._next_generated_user_id is None: with (yield self._generate_user_id_linearizer.queue(())): - if reseed or self._next_generated_user_id is None: + if self._next_generated_user_id is None: self._next_generated_user_id = ( yield self.store.find_next_generated_user_id_localpart() ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a509e11d6..2816bd8f8 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -28,6 +28,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.http.endpoint import parse_and_validate_server_name from synapse.storage.state import StateFilter from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils @@ -554,7 +555,8 @@ class RoomCreationHandler(BaseHandler): invite_list = config.get("invite", []) for i in invite_list: try: - UserID.from_string(i) + uid = UserID.from_string(i) + parse_and_validate_server_name(uid.domain) except Exception: raise SynapseError(400, "Invalid user_id: %s" % (i,)) @@ -579,8 +581,8 @@ class RoomCreationHandler(BaseHandler): room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public) + directory_handler = self.hs.get_handlers().directory_handler if room_alias: - directory_handler = self.hs.get_handlers().directory_handler yield directory_handler.create_association( requester=requester, room_id=room_id, @@ -665,6 +667,7 @@ class RoomCreationHandler(BaseHandler): for invite_3pid in invite_3pid_list: id_server = invite_3pid["id_server"] + id_access_token = invite_3pid.get("id_access_token") # optional address = invite_3pid["address"] medium = invite_3pid["medium"] yield self.hs.get_room_member_handler().do_3pid_invite( @@ -675,6 +678,7 @@ class RoomCreationHandler(BaseHandler): id_server, requester, txn_id=None, + id_access_token=id_access_token, ) result = {"room_id": room_id} diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index a7e55f00e..c615206df 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -16,8 +16,7 @@ import logging from collections import namedtuple -from six import PY3, iteritems -from six.moves import range +from six import iteritems import msgpack from unpaddedbase64 import decode_base64, encode_base64 @@ -27,7 +26,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import Codes, HttpResponseException from synapse.types import ThirdPartyInstanceID -from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.util.caches.response_cache import ResponseCache @@ -37,7 +35,6 @@ logger = logging.getLogger(__name__) REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000 - # This is used to indicate we should only return rooms published to the main list. EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) @@ -72,6 +69,8 @@ class RoomListHandler(BaseHandler): This can be (None, None) to indicate the main list, or a particular appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. + from_federation (bool): true iff the request comes from the federation + API """ if not self.enable_room_list_search: return defer.succeed({"chunk": [], "total_room_count_estimate": 0}) @@ -89,16 +88,8 @@ class RoomListHandler(BaseHandler): # appservice specific lists. logger.info("Bypassing cache as search request.") - # XXX: Quick hack to stop room directory queries taking too long. - # Timeout request after 60s. Probably want a more fundamental - # solution at some point - timeout = self.clock.time() + 60 return self._get_public_room_list( - limit, - since_token, - search_filter, - network_tuple=network_tuple, - timeout=timeout, + limit, since_token, search_filter, network_tuple=network_tuple ) key = (limit, since_token, network_tuple) @@ -119,7 +110,6 @@ class RoomListHandler(BaseHandler): search_filter=None, network_tuple=EMPTY_THIRD_PARTY_ID, from_federation=False, - timeout=None, ): """Generate a public room list. Args: @@ -132,240 +122,116 @@ class RoomListHandler(BaseHandler): Setting to None returns all public rooms across all lists. from_federation (bool): Whether this request originated from a federating server or a client. Used for room filtering. - timeout (int|None): Amount of seconds to wait for a response before - timing out. """ - if since_token and since_token != "END": - since_token = RoomListNextBatch.from_token(since_token) - else: - since_token = None - rooms_to_order_value = {} - rooms_to_num_joined = {} + # Pagination tokens work by storing the room ID sent in the last batch, + # plus the direction (forwards or backwards). Next batch tokens always + # go forwards, prev batch tokens always go backwards. - newly_visible = [] - newly_unpublished = [] if since_token: - stream_token = since_token.stream_ordering - current_public_id = yield self.store.get_current_public_room_stream_id() - public_room_stream_id = since_token.public_room_stream_id - newly_visible, newly_unpublished = yield self.store.get_public_room_changes( - public_room_stream_id, current_public_id, network_tuple=network_tuple - ) - else: - stream_token = yield self.store.get_room_max_stream_ordering() - public_room_stream_id = yield self.store.get_current_public_room_stream_id() + batch_token = RoomListNextBatch.from_token(since_token) - room_ids = yield self.store.get_public_room_ids_at_stream_id( - public_room_stream_id, network_tuple=network_tuple + bounds = (batch_token.last_joined_members, batch_token.last_room_id) + forwards = batch_token.direction_is_forward + else: + batch_token = None + bounds = None + + forwards = True + + # we request one more than wanted to see if there are more pages to come + probing_limit = limit + 1 if limit is not None else None + + results = yield self.store.get_largest_public_rooms( + network_tuple, + search_filter, + probing_limit, + bounds=bounds, + forwards=forwards, + ignore_non_federatable=from_federation, ) - # We want to return rooms in a particular order: the number of joined - # users. We then arbitrarily use the room_id as a tie breaker. + def build_room_entry(room): + entry = { + "room_id": room["room_id"], + "name": room["name"], + "topic": room["topic"], + "canonical_alias": room["canonical_alias"], + "num_joined_members": room["joined_members"], + "avatar_url": room["avatar"], + "world_readable": room["history_visibility"] == "world_readable", + "guest_can_join": room["guest_access"] == "can_join", + } - @defer.inlineCallbacks - def get_order_for_room(room_id): - # Most of the rooms won't have changed between the since token and - # now (especially if the since token is "now"). So, we can ask what - # the current users are in a room (that will hit a cache) and then - # check if the room has changed since the since token. (We have to - # do it in that order to avoid races). - # If things have changed then fall back to getting the current state - # at the since token. - joined_users = yield self.store.get_users_in_room(room_id) - if self.store.has_room_changed_since(room_id, stream_token): - latest_event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_token - ) + # Filter out Nones – rather omit the field altogether + return {k: v for k, v in entry.items() if v is not None} - if not latest_event_ids: - return + results = [build_room_entry(r) for r in results] - joined_users = yield self.state_handler.get_current_users_in_room( - room_id, latest_event_ids - ) + response = {} + num_results = len(results) + if limit is not None: + more_to_come = num_results == probing_limit - num_joined_users = len(joined_users) - rooms_to_num_joined[room_id] = num_joined_users + # Depending on direction we trim either the front or back. + if forwards: + results = results[:limit] + else: + results = results[-limit:] + else: + more_to_come = False - if num_joined_users == 0: - return + if num_results > 0: + final_entry = results[-1] + initial_entry = results[0] - # We want larger rooms to be first, hence negating num_joined_users - rooms_to_order_value[room_id] = (-num_joined_users, room_id) + if forwards: + if batch_token: + # If there was a token given then we assume that there + # must be previous results. + response["prev_batch"] = RoomListNextBatch( + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, + ).to_token() - logger.info( - "Getting ordering for %i rooms since %s", len(room_ids), stream_token + if more_to_come: + response["next_batch"] = RoomListNextBatch( + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, + ).to_token() + else: + if batch_token: + response["next_batch"] = RoomListNextBatch( + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, + ).to_token() + + if more_to_come: + response["prev_batch"] = RoomListNextBatch( + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, + ).to_token() + + for room in results: + # populate search result entries with additional fields, namely + # 'aliases' + room_id = room["room_id"] + + aliases = yield self.store.get_aliases_for_room(room_id) + if aliases: + room["aliases"] = aliases + + response["chunk"] = results + + response["total_room_count_estimate"] = yield self.store.count_public_rooms( + network_tuple, ignore_non_federatable=from_federation ) - yield concurrently_execute(get_order_for_room, room_ids, 10) - sorted_entries = sorted(rooms_to_order_value.items(), key=lambda e: e[1]) - sorted_rooms = [room_id for room_id, _ in sorted_entries] - - # `sorted_rooms` should now be a list of all public room ids that is - # stable across pagination. Therefore, we can use indices into this - # list as our pagination tokens. - - # Filter out rooms that we don't want to return - rooms_to_scan = [ - r - for r in sorted_rooms - if r not in newly_unpublished and rooms_to_num_joined[r] > 0 - ] - - total_room_count = len(rooms_to_scan) - - if since_token: - # Filter out rooms we've already returned previously - # `since_token.current_limit` is the index of the last room we - # sent down, so we exclude it and everything before/after it. - if since_token.direction_is_forward: - rooms_to_scan = rooms_to_scan[since_token.current_limit + 1 :] - else: - rooms_to_scan = rooms_to_scan[: since_token.current_limit] - rooms_to_scan.reverse() - - logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - - # _append_room_entry_to_chunk will append to chunk but will stop if - # len(chunk) > limit - # - # Normally we will generate enough results on the first iteration here, - # but if there is a search filter, _append_room_entry_to_chunk may - # filter some results out, in which case we loop again. - # - # We don't want to scan over the entire range either as that - # would potentially waste a lot of work. - # - # XXX if there is no limit, we may end up DoSing the server with - # calls to get_current_state_ids for every single room on the - # server. Surely we should cap this somehow? - # - if limit: - step = limit + 1 - else: - # step cannot be zero - step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 - - chunk = [] - for i in range(0, len(rooms_to_scan), step): - if timeout and self.clock.time() > timeout: - raise Exception("Timed out searching room directory") - - batch = rooms_to_scan[i : i + step] - logger.info("Processing %i rooms for result", len(batch)) - yield concurrently_execute( - lambda r: self._append_room_entry_to_chunk( - r, - rooms_to_num_joined[r], - chunk, - limit, - search_filter, - from_federation=from_federation, - ), - batch, - 5, - ) - logger.info("Now %i rooms in result", len(chunk)) - if len(chunk) >= limit + 1: - break - - chunk.sort(key=lambda e: (-e["num_joined_members"], e["room_id"])) - - # Work out the new limit of the batch for pagination, or None if we - # know there are no more results that would be returned. - # i.e., [since_token.current_limit..new_limit] is the batch of rooms - # we've returned (or the reverse if we paginated backwards) - # We tried to pull out limit + 1 rooms above, so if we have <= limit - # then we know there are no more results to return - new_limit = None - if chunk and (not limit or len(chunk) > limit): - - if not since_token or since_token.direction_is_forward: - if limit: - chunk = chunk[:limit] - last_room_id = chunk[-1]["room_id"] - else: - if limit: - chunk = chunk[-limit:] - last_room_id = chunk[0]["room_id"] - - new_limit = sorted_rooms.index(last_room_id) - - results = {"chunk": chunk, "total_room_count_estimate": total_room_count} - - if since_token: - results["new_rooms"] = bool(newly_visible) - - if not since_token or since_token.direction_is_forward: - if new_limit is not None: - results["next_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=True, - ).to_token() - - if since_token: - results["prev_batch"] = since_token.copy_and_replace( - direction_is_forward=False, - current_limit=since_token.current_limit + 1, - ).to_token() - else: - if new_limit is not None: - results["prev_batch"] = RoomListNextBatch( - stream_ordering=stream_token, - public_room_stream_id=public_room_stream_id, - current_limit=new_limit, - direction_is_forward=False, - ).to_token() - - if since_token: - results["next_batch"] = since_token.copy_and_replace( - direction_is_forward=True, - current_limit=since_token.current_limit - 1, - ).to_token() - - return results - - @defer.inlineCallbacks - def _append_room_entry_to_chunk( - self, - room_id, - num_joined_users, - chunk, - limit, - search_filter, - from_federation=False, - ): - """Generate the entry for a room in the public room list and append it - to the `chunk` if it matches the search filter - - Args: - room_id (str): The ID of the room. - num_joined_users (int): The number of joined users in the room. - chunk (list) - limit (int|None): Maximum amount of rooms to display. Function will - return if length of chunk is greater than limit + 1. - search_filter (dict|None) - from_federation (bool): Whether this request originated from a - federating server or a client. Used for room filtering. - """ - if limit and len(chunk) > limit + 1: - # We've already got enough, so lets just drop it. - return - - result = yield self.generate_room_entry(room_id, num_joined_users) - if not result: - return - - if from_federation and not result.get("m.federate", True): - # This is a room that other servers cannot join. Do not show them - # this room. - return - - if _matches_room_entry(result, search_filter): - chunk.append(result) + return response @cachedInlineCallbacks(num_args=1, cache_context=True) def generate_room_entry( @@ -580,18 +446,15 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( - "stream_ordering", # stream_ordering of the first public room list - "public_room_stream_id", # public room stream id for first public room list - "current_limit", # The number of previous rooms returned + "last_joined_members", # The count to get rooms after/before + "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - KEY_DICT = { - "stream_ordering": "s", - "public_room_stream_id": "p", - "current_limit": "n", + "last_joined_members": "m", + "last_room_id": "r", "direction_is_forward": "d", } @@ -599,13 +462,7 @@ class RoomListNextBatch( @classmethod def from_token(cls, token): - if PY3: - # The argument raw=False is only available on new versions of - # msgpack, and only really needed on Python 3. Gate it behind - # a PY3 check to avoid causing issues on Debian-packaged versions. - decoded = msgpack.loads(decode_base64(token), raw=False) - else: - decoded = msgpack.loads(decode_base64(token)) + decoded = msgpack.loads(decode_base64(token), raw=False) return RoomListNextBatch( **{cls.REVERSE_KEY_DICT[key]: val for key, val in decoded.items()} ) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 093f2ea36..380e2fad5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -20,15 +20,11 @@ import logging from six.moves import http_client -from signedjson.key import decode_verify_key_bytes -from signedjson.sign import verify_signed_json -from unpaddedbase64 import decode_base64 - from twisted.internet import defer from synapse import types from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError +from synapse.api.errors import AuthError, Codes, SynapseError from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room @@ -37,8 +33,6 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -id_server_scheme = "https://" - class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of @@ -59,10 +53,10 @@ class RoomMemberHandler(object): self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - self.simple_http_client = hs.get_simple_http_client() self.federation_handler = hs.get_handlers().federation_handler self.directory_handler = hs.get_handlers().directory_handler + self.identity_handler = hs.get_handlers().identity_handler self.registration_handler = hs.get_registration_handler() self.profile_handler = hs.get_profile_handler() self.event_creation_handler = hs.get_event_creation_handler() @@ -100,7 +94,7 @@ class RoomMemberHandler(object): raise NotImplementedError() @abc.abstractmethod - def _remote_reject_invite(self, remote_room_hosts, room_id, target): + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): """Attempt to reject an invite for a room this server is not in. If we fail to do so we locally mark the invite as rejected. @@ -209,23 +203,11 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: + # Copy over user state if we're joining an upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() + ) yield self._user_joined_room(target, room_id) - - # Copy over direct message status and room tags if this is a join - # on an upgraded room - - # Check if this is an upgraded room - predecessor = yield self.store.get_room_predecessor(room_id) - - if predecessor: - # It is an upgraded room. Copy over old tags - self.copy_room_tags_and_direct_to_room( - predecessor["room_id"], room_id, user_id - ) - # Move over old push rules - self.store.move_push_rules_from_room_to_room_for_user( - predecessor["room_id"], room_id, user_id - ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) @@ -469,10 +451,16 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self._remote_join( + remote_join_response = yield self._remote_join( requester, remote_room_hosts, room_id, target, content ) - return ret + + # Copy over user state if this is a join on an remote upgraded room + yield self.copy_user_state_if_room_upgrade( + room_id, requester.user.to_string() + ) + + return remote_join_response elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: @@ -510,9 +498,39 @@ class RoomMemberHandler(object): return res @defer.inlineCallbacks - def send_membership_event( - self, requester, event, context, remote_room_hosts=None, ratelimit=True - ): + def copy_user_state_if_room_upgrade(self, new_room_id, user_id): + """Copy user-specific information when they join a new room if that new room is the + result of a room upgrade + + Args: + new_room_id (str): The ID of the room the user is joining + user_id (str): The ID of the user + + Returns: + Deferred + """ + # Check if the new room is an upgraded room + predecessor = yield self.store.get_room_predecessor(new_room_id) + if not predecessor: + return + + logger.debug( + "Found predecessor for %s: %s. Copying over room tags and push " "rules", + new_room_id, + predecessor, + ) + + # It is an upgraded room. Copy over old tags + yield self.copy_room_tags_and_direct_to_room( + predecessor["room_id"], new_room_id, user_id + ) + # Copy over push rules + yield self.store.copy_push_rules_from_room_to_room_for_user( + predecessor["room_id"], new_room_id, user_id + ) + + @defer.inlineCallbacks + def send_membership_event(self, requester, event, context, ratelimit=True): """ Change the membership status of a user in a room. @@ -522,16 +540,10 @@ class RoomMemberHandler(object): act as the sender, will be skipped. event (SynapseEvent): The membership event. context: The context of the event. - is_guest (bool): Whether the sender is a guest. - room_hosts ([str]): Homeservers which are likely to already be in - the room, and could be danced with in order to join this - homeserver for the first time. ratelimit (bool): Whether to rate limit this request. Raises: SynapseError if there was a problem changing the membership. """ - remote_room_hosts = remote_room_hosts or [] - target_user = UserID.from_string(event.state_key) room_id = event.room_id @@ -634,7 +646,7 @@ class RoomMemberHandler(object): servers.remove(room_alias.domain) servers.insert(0, room_alias.domain) - return (RoomID.from_string(room_id), servers) + return RoomID.from_string(room_id), servers @defer.inlineCallbacks def _get_inviter(self, user_id, room_id): @@ -646,7 +658,15 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def do_3pid_invite( - self, room_id, inviter, medium, address, id_server, requester, txn_id + self, + room_id, + inviter, + medium, + address, + id_server, + requester, + txn_id, + id_access_token=None, ): if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin(requester.user) @@ -669,7 +689,14 @@ class RoomMemberHandler(object): Codes.FORBIDDEN, ) - invitee = yield self._lookup_3pid(id_server, medium, address) + if not self._enable_lookup: + raise SynapseError( + 403, "Looking up third-party identifiers is denied from this server" + ) + + invitee = yield self.identity_handler.lookup_3pid( + id_server, medium, address, id_access_token + ) if invitee: yield self.update_membership( @@ -677,67 +704,27 @@ class RoomMemberHandler(object): ) else: yield self._make_and_store_3pid_invite( - requester, id_server, medium, address, room_id, inviter, txn_id=txn_id + requester, + id_server, + medium, + address, + room_id, + inviter, + txn_id=txn_id, + id_access_token=id_access_token, ) - @defer.inlineCallbacks - def _lookup_3pid(self, id_server, medium, address): - """Looks up a 3pid in the passed identity server. - - Args: - id_server (str): The server name (including port, if required) - of the identity server to use. - medium (str): The type of the third party identifier (e.g. "email"). - address (str): The third party identifier (e.g. "foo@example.com"). - - Returns: - str: the matrix ID of the 3pid, or None if it is not recognized. - """ - if not self._enable_lookup: - raise SynapseError( - 403, "Looking up third-party identifiers is denied from this server" - ) - try: - data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), - {"medium": medium, "address": address}, - ) - - if "mxid" in data: - if "signatures" not in data: - raise AuthError(401, "No signatures on 3pid binding") - yield self._verify_any_signature(data, id_server) - return data["mxid"] - - except IOError as e: - logger.warn("Error from identity server lookup: %s" % (e,)) - return None - - @defer.inlineCallbacks - def _verify_any_signature(self, data, server_hostname): - if server_hostname not in data["signatures"]: - raise AuthError(401, "No signature from server %s" % (server_hostname,)) - for key_name, signature in data["signatures"][server_hostname].items(): - key_data = yield self.simple_http_client.get_json( - "%s%s/_matrix/identity/api/v1/pubkey/%s" - % (id_server_scheme, server_hostname, key_name) - ) - if "public_key" not in key_data: - raise AuthError( - 401, "No public key named %s from %s" % (key_name, server_hostname) - ) - verify_signed_json( - data, - server_hostname, - decode_verify_key_bytes( - key_name, decode_base64(key_data["public_key"]) - ), - ) - return - @defer.inlineCallbacks def _make_and_store_3pid_invite( - self, requester, id_server, medium, address, room_id, user, txn_id + self, + requester, + id_server, + medium, + address, + room_id, + user, + txn_id, + id_access_token=None, ): room_state = yield self.state_handler.get_current_state(room_id) @@ -773,7 +760,7 @@ class RoomMemberHandler(object): room_avatar_url = room_avatar_event.content.get("url", "") token, public_keys, fallback_public_key, display_name = ( - yield self._ask_id_server_for_third_party_invite( + yield self.identity_handler.ask_id_server_for_third_party_invite( requester=requester, id_server=id_server, medium=medium, @@ -786,6 +773,7 @@ class RoomMemberHandler(object): room_name=room_name, inviter_display_name=inviter_display_name, inviter_avatar_url=inviter_avatar_url, + id_access_token=id_access_token, ) ) @@ -808,103 +796,6 @@ class RoomMemberHandler(object): txn_id=txn_id, ) - @defer.inlineCallbacks - def _ask_id_server_for_third_party_invite( - self, - requester, - id_server, - medium, - address, - room_id, - inviter_user_id, - room_alias, - room_avatar_url, - room_join_rules, - room_name, - inviter_display_name, - inviter_avatar_url, - ): - """ - Asks an identity server for a third party invite. - - Args: - requester (Requester) - id_server (str): hostname + optional port for the identity server. - medium (str): The literal string "email". - address (str): The third party address being invited. - room_id (str): The ID of the room to which the user is invited. - inviter_user_id (str): The user ID of the inviter. - room_alias (str): An alias for the room, for cosmetic notifications. - room_avatar_url (str): The URL of the room's avatar, for cosmetic - notifications. - room_join_rules (str): The join rules of the email (e.g. "public"). - room_name (str): The m.room.name of the room. - inviter_display_name (str): The current display name of the - inviter. - inviter_avatar_url (str): The URL of the inviter's avatar. - - Returns: - A deferred tuple containing: - token (str): The token which must be signed to prove authenticity. - public_keys ([{"public_key": str, "key_validity_url": str}]): - public_key is a base64-encoded ed25519 public key. - fallback_public_key: One element from public_keys. - display_name (str): A user-friendly name to represent the invited - user. - """ - - is_url = "%s%s/_matrix/identity/api/v1/store-invite" % ( - id_server_scheme, - id_server, - ) - - invite_config = { - "medium": medium, - "address": address, - "room_id": room_id, - "room_alias": room_alias, - "room_avatar_url": room_avatar_url, - "room_join_rules": room_join_rules, - "room_name": room_name, - "sender": inviter_user_id, - "sender_display_name": inviter_display_name, - "sender_avatar_url": inviter_avatar_url, - } - - try: - data = yield self.simple_http_client.post_json_get_json( - is_url, invite_config - ) - except HttpResponseException as e: - # Some identity servers may only support application/x-www-form-urlencoded - # types. This is especially true with old instances of Sydent, see - # https://github.com/matrix-org/sydent/pull/170 - logger.info( - "Failed to POST %s with JSON, falling back to urlencoded form: %s", - is_url, - e, - ) - data = yield self.simple_http_client.post_urlencoded_get_json( - is_url, invite_config - ) - - # TODO: Check for success - token = data["token"] - public_keys = data.get("public_keys", []) - if "public_key" in data: - fallback_public_key = { - "public_key": data["public_key"], - "key_validity_url": "%s%s/_matrix/identity/api/v1/pubkey/isvalid" - % (id_server_scheme, id_server), - } - else: - fallback_public_key = public_keys[0] - - if not public_keys: - public_keys.append(fallback_public_key) - display_name = data["display_name"] - return token, public_keys, fallback_public_key, display_name - @defer.inlineCallbacks def _is_host_in_room(self, current_state_ids): # Have we just created the room, and is this about to be the very @@ -1057,7 +948,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): # The 'except' clause is very broad, but we need to # capture everything from DNS failures upwards # - logger.warn("Failed to reject invite: %s", e) + logger.warning("Failed to reject invite: %s", e) yield self.store.locally_reject_invite(target.to_string(), room_id) return {} diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index a1ce6929c..cc9e6b9bd 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -21,6 +21,8 @@ from saml2.client import Saml2Client from synapse.api.errors import SynapseError from synapse.http.servlet import parse_string from synapse.rest.client.v1.login import SSOAuthHandler +from synapse.types import UserID, map_username_to_mxid_localpart +from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -29,12 +31,26 @@ class SamlHandler: def __init__(self, hs): self._saml_client = Saml2Client(hs.config.saml2_sp_config) self._sso_auth_handler = SSOAuthHandler(hs) + self._registration_handler = hs.get_registration_handler() + + self._clock = hs.get_clock() + self._datastore = hs.get_datastore() + self._hostname = hs.hostname + self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute + self._grandfathered_mxid_source_attribute = ( + hs.config.saml2_grandfathered_mxid_source_attribute + ) + self._mxid_mapper = hs.config.saml2_mxid_mapper + + # identifier for the external_ids table + self._auth_provider_id = "saml" # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict = {} - self._clock = hs.get_clock() - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + # a lock on the mappings + self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock) def handle_redirect_request(self, client_redirect_url): """Handle an incoming request to /login/sso/redirect @@ -60,7 +76,7 @@ class SamlHandler: # this shouldn't happen! raise Exception("prepare_for_authenticate didn't return a Location header") - def handle_saml_response(self, request): + async def handle_saml_response(self, request): """Handle an incoming request to /_matrix/saml2/authn_response Args: @@ -77,6 +93,10 @@ class SamlHandler: # the dict. self.expire_sessions() + user_id = await self._map_saml_response_to_user(resp_bytes) + self._sso_auth_handler.complete_sso_login(user_id, request, relay_state) + + async def _map_saml_response_to_user(self, resp_bytes): try: saml2_auth = self._saml_client.parse_authn_request_response( resp_bytes, @@ -91,18 +111,88 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - if "uid" not in saml2_auth.ava: + logger.info("SAML2 response: %s", saml2_auth.origxml) + logger.info("SAML2 mapped attributes: %s", saml2_auth.ava) + + try: + remote_user_id = saml2_auth.ava["uid"][0] + except KeyError: logger.warning("SAML2 response lacks a 'uid' attestation") raise SynapseError(400, "uid not in SAML2 response") + try: + mxid_source = saml2_auth.ava[self._mxid_source_attribute][0] + except KeyError: + logger.warning( + "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute + ) + raise SynapseError( + 400, "%s not in SAML2 response" % (self._mxid_source_attribute,) + ) + self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None) - username = saml2_auth.ava["uid"][0] displayName = saml2_auth.ava.get("displayName", [None])[0] - return self._sso_auth_handler.on_successful_auth( - username, request, relay_state, user_display_name=displayName - ) + with (await self._mapping_lock.queue(self._auth_provider_id)): + # first of all, check if we already have a mapping for this user + logger.info( + "Looking for existing mapping for user %s:%s", + self._auth_provider_id, + remote_user_id, + ) + registered_user_id = await self._datastore.get_user_by_external_id( + self._auth_provider_id, remote_user_id + ) + if registered_user_id is not None: + logger.info("Found existing mapping %s", registered_user_id) + return registered_user_id + + # backwards-compatibility hack: see if there is an existing user with a + # suitable mapping from the uid + if ( + self._grandfathered_mxid_source_attribute + and self._grandfathered_mxid_source_attribute in saml2_auth.ava + ): + attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0] + user_id = UserID( + map_username_to_mxid_localpart(attrval), self._hostname + ).to_string() + logger.info( + "Looking for existing account based on mapped %s %s", + self._grandfathered_mxid_source_attribute, + user_id, + ) + + users = await self._datastore.get_users_by_id_case_insensitive(user_id) + if users: + registered_user_id = list(users.keys())[0] + logger.info("Grandfathering mapping to %s", registered_user_id) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id + + # figure out a new mxid for this user + base_mxid_localpart = self._mxid_mapper(mxid_source) + + suffix = 0 + while True: + localpart = base_mxid_localpart + (str(suffix) if suffix else "") + if not await self._datastore.get_users_by_id_case_insensitive( + UserID(localpart, self._hostname).to_string() + ): + break + suffix += 1 + logger.info("Allocating mxid for new user with localpart %s", localpart) + + registered_user_id = await self._registration_handler.register_user( + localpart=localpart, default_display_name=displayName + ) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id def expire_sessions(self): expire_before = self._clock.time_msec() - self._saml2_session_lifetime diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3c265f371..466daf920 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -84,17 +84,26 @@ class StatsHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: - deltas = yield self.store.get_current_state_deltas(self.pos) + # Be sure to read the max stream_ordering *before* checking if there are any outstanding + # deltas, since there is otherwise a chance that we could miss updates which arrive + # after we check the deltas. + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: + break + + logger.debug( + "Processing room stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) if deltas: logger.debug("Handling %d state deltas", len(deltas)) room_deltas, user_deltas = yield self._handle_deltas(deltas) - - max_pos = deltas[-1]["stream_id"] else: room_deltas = {} user_deltas = {} - max_pos = yield self.store.get_room_max_stream_ordering() # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( @@ -117,10 +126,9 @@ class StatsHandler(StateDeltasHandler): stream_id=max_pos, ) - event_processing_positions.labels("stats").set(max_pos) + logger.debug("Handled room stats to %s -> %s", self.pos, max_pos) - if self.pos == max_pos: - break + event_processing_positions.labels("stats").set(max_pos) self.pos = max_pos @@ -287,6 +295,7 @@ class StatsHandler(StateDeltasHandler): room_state["guest_access"] = event_content.get("guest_access") for room_id, state in room_to_state_updates.items(): + logger.info("Updating room_stats_state for %s: %s", room_id, state) yield self.store.update_room_state(room_id, state) return room_to_stats_deltas, user_to_stats_deltas diff --git a/synapse/handlers/ui_auth/__init__.py b/synapse/handlers/ui_auth/__init__.py new file mode 100644 index 000000000..824f37f8f --- /dev/null +++ b/synapse/handlers/ui_auth/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module implements user-interactive auth verification. + +TODO: move more stuff out of AuthHandler in here. + +""" + +from synapse.handlers.ui_auth.checkers import INTERACTIVE_AUTH_CHECKERS # noqa: F401 diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py new file mode 100644 index 000000000..29aa1e5aa --- /dev/null +++ b/synapse/handlers/ui_auth/checkers.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from canonicaljson import json + +from twisted.internet import defer +from twisted.web.client import PartialDownloadError + +from synapse.api.constants import LoginType +from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.config.emailconfig import ThreepidBehaviour + +logger = logging.getLogger(__name__) + + +class UserInteractiveAuthChecker: + """Abstract base class for an interactive auth checker""" + + def __init__(self, hs): + pass + + def is_enabled(self): + """Check if the configuration of the homeserver allows this checker to work + + Returns: + bool: True if this login type is enabled. + """ + + def check_auth(self, authdict, clientip): + """Given the authentication dict from the client, attempt to check this step + + Args: + authdict (dict): authentication dictionary from the client + clientip (str): The IP address of the client. + + Raises: + SynapseError if authentication failed + + Returns: + Deferred: the result of authentication (to pass back to the client?) + """ + raise NotImplementedError() + + +class DummyAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.DUMMY + + def is_enabled(self): + return True + + def check_auth(self, authdict, clientip): + return defer.succeed(True) + + +class TermsAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.TERMS + + def is_enabled(self): + return True + + def check_auth(self, authdict, clientip): + return defer.succeed(True) + + +class RecaptchaAuthChecker(UserInteractiveAuthChecker): + AUTH_TYPE = LoginType.RECAPTCHA + + def __init__(self, hs): + super().__init__(hs) + self._enabled = bool(hs.config.recaptcha_private_key) + self._http_client = hs.get_simple_http_client() + self._url = hs.config.recaptcha_siteverify_api + self._secret = hs.config.recaptcha_private_key + + def is_enabled(self): + return self._enabled + + @defer.inlineCallbacks + def check_auth(self, authdict, clientip): + try: + user_response = authdict["response"] + except KeyError: + # Client tried to provide captcha but didn't give the parameter: + # bad request. + raise LoginError( + 400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED + ) + + logger.info( + "Submitting recaptcha response %s with remoteip %s", user_response, clientip + ) + + # TODO: get this from the homeserver rather than creating a new one for + # each request + try: + resp_body = yield self._http_client.post_urlencoded_get_json( + self._url, + args={ + "secret": self._secret, + "response": user_response, + "remoteip": clientip, + }, + ) + except PartialDownloadError as pde: + # Twisted is silly + data = pde.response + resp_body = json.loads(data) + + if "success" in resp_body: + # Note that we do NOT check the hostname here: we explicitly + # intend the CAPTCHA to be presented by whatever client the + # user is using, we just care that they have completed a CAPTCHA. + logger.info( + "%s reCAPTCHA from hostname %s", + "Successful" if resp_body["success"] else "Failed", + resp_body.get("hostname"), + ) + if resp_body["success"]: + return True + raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + + +class _BaseThreepidAuthChecker: + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def _check_threepid(self, medium, authdict): + if "threepid_creds" not in authdict: + raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) + + threepid_creds = authdict["threepid_creds"] + + identity_handler = self.hs.get_handlers().identity_handler + + logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,)) + + # msisdns are currently always ThreepidBehaviour.REMOTE + if medium == "msisdn": + if not self.hs.config.account_threepid_delegate_msisdn: + raise SynapseError( + 400, "Phone number verification is not enabled on this homeserver" + ) + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_msisdn, threepid_creds + ) + elif medium == "email": + if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.account_threepid_delegate_email + threepid = yield identity_handler.threepid_from_creds( + self.hs.config.account_threepid_delegate_email, threepid_creds + ) + elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + threepid = None + row = yield self.store.get_threepid_validation_session( + medium, + threepid_creds["client_secret"], + sid=threepid_creds["sid"], + validated=True, + ) + + if row: + threepid = { + "medium": row["medium"], + "address": row["address"], + "validated_at": row["validated_at"], + } + + # Valid threepid returned, delete from the db + yield self.store.delete_threepid_session(threepid_creds["sid"]) + else: + raise SynapseError( + 400, "Email address verification is not enabled on this homeserver" + ) + else: + # this can't happen! + raise AssertionError("Unrecognized threepid medium: %s" % (medium,)) + + if not threepid: + raise LoginError(401, "", errcode=Codes.UNAUTHORIZED) + + if threepid["medium"] != medium: + raise LoginError( + 401, + "Expecting threepid of type '%s', got '%s'" + % (medium, threepid["medium"]), + errcode=Codes.UNAUTHORIZED, + ) + + threepid["threepid_creds"] = authdict["threepid_creds"] + + return threepid + + +class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker): + AUTH_TYPE = LoginType.EMAIL_IDENTITY + + def __init__(self, hs): + UserInteractiveAuthChecker.__init__(self, hs) + _BaseThreepidAuthChecker.__init__(self, hs) + + def is_enabled(self): + return self.hs.config.threepid_behaviour_email in ( + ThreepidBehaviour.REMOTE, + ThreepidBehaviour.LOCAL, + ) + + def check_auth(self, authdict, clientip): + return self._check_threepid("email", authdict) + + +class MsisdnAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChecker): + AUTH_TYPE = LoginType.MSISDN + + def __init__(self, hs): + UserInteractiveAuthChecker.__init__(self, hs) + _BaseThreepidAuthChecker.__init__(self, hs) + + def is_enabled(self): + return bool(self.hs.config.account_threepid_delegate_msisdn) + + def check_auth(self, authdict, clientip): + return self._check_threepid("msisdn", authdict) + + +INTERACTIVE_AUTH_CHECKERS = [ + DummyAuthChecker, + TermsAuthChecker, + RecaptchaAuthChecker, + EmailIdentityAuthChecker, + MsisdnAuthChecker, +] +"""A list of UserInteractiveAuthChecker classes""" diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index e53669e40..624f05ab5 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -138,21 +138,28 @@ class UserDirectoryHandler(StateDeltasHandler): # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "user_dir_delta"): - deltas = yield self.store.get_current_state_deltas(self.pos) - if not deltas: + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: return + logger.debug( + "Processing user stats %s->%s", self.pos, room_max_stream_ordering + ) + max_pos, deltas = yield self.store.get_current_state_deltas( + self.pos, room_max_stream_ordering + ) + logger.info("Handling %d state deltas", len(deltas)) yield self._handle_deltas(deltas) - self.pos = deltas[-1]["stream_id"] + self.pos = max_pos # Expose current event processing position to prometheus synapse.metrics.event_processing_positions.labels("user_dir").set( - self.pos + max_pos ) - yield self.store.update_user_directory_stream_pos(self.pos) + yield self.store.update_user_directory_stream_pos(max_pos) @defer.inlineCallbacks def _handle_deltas(self, deltas): diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 3acf772cd..3880ce0d9 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -42,11 +42,13 @@ def cancelled_to_request_timed_out_error(value, timeout): ACCESS_TOKEN_RE = re.compile(r"(\?.*access(_|%5[Ff])token=)[^&]*(.*)$") +CLIENT_SECRET_RE = re.compile(r"(\?.*client(_|%5[Ff])secret=)[^&]*(.*)$") def redact_uri(uri): - """Strips access tokens from the uri replaces with """ - return ACCESS_TOKEN_RE.sub(r"\1\3", uri) + """Strips sensitive information from the uri replaces with """ + uri = ACCESS_TOKEN_RE.sub(r"\1\3", uri) + return CLIENT_SECRET_RE.sub(r"\1\3", uri) class QuieterFileBodyProducer(FileBodyProducer): diff --git a/synapse/http/client.py b/synapse/http/client.py index 51765ae3c..cdf828a4f 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -327,7 +327,7 @@ class SimpleHttpClient(object): Args: uri (str): args (dict[str, str|List[str]]): query params - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -371,7 +371,7 @@ class SimpleHttpClient(object): Args: uri (str): post_json (object): - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: @@ -414,7 +414,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -438,7 +438,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -482,7 +482,7 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the @@ -516,7 +516,7 @@ class SimpleHttpClient(object): Args: url (str): The URL to GET output_stream (file): File to write the response body to. - headers (dict[str, List[str]]|None): If not None, a map from + headers (dict[str|bytes, List[str|bytes]]|None): If not None, a map from header name to a list of values for that header Returns: A (int,dict,string,int) tuple of the file length, dict of the response diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index feae7de5b..647d26dc5 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -217,7 +217,7 @@ class MatrixHostnameEndpoint(object): self._tls_options = None else: self._tls_options = tls_client_options_factory.get_options( - self._parsed_uri.host.decode("ascii") + self._parsed_uri.host ) self._srv_resolver = srv_resolver diff --git a/synapse/http/server.py b/synapse/http/server.py index cb9158fe1..2ccb210fd 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -388,7 +388,7 @@ class DirectServeResource(resource.Resource): if not callback: return super().render(request) - resp = callback(request) + resp = trace_servlet(self.__class__.__name__)(callback)(request) # If it's a coroutine, turn it into a Deferred if isinstance(resp, types.CoroutineType): diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 0367d6dfc..3220e985a 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -18,6 +18,7 @@ import os.path import sys import typing import warnings +from typing import List import attr from constantly import NamedConstant, Names, ValueConstant, Values @@ -33,7 +34,6 @@ from twisted.logger import ( LogLevelFilterPredicate, LogPublisher, eventAsText, - globalLogBeginner, jsonFileLogObserver, ) @@ -134,7 +134,7 @@ class PythonStdlibToTwistedLogger(logging.Handler): ) -def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: +def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver: """ A log observer that formats events like the traditional log formatter and sends them to `outFile`. @@ -265,7 +265,7 @@ def setup_structured_logging( hs, config, log_config: dict, - logBeginner: LogBeginner = globalLogBeginner, + logBeginner: LogBeginner, redirect_stdlib_logging: bool = True, ) -> LogPublisher: """ @@ -286,7 +286,7 @@ def setup_structured_logging( if "drains" not in log_config: raise ConfigError("The logging configuration requires a list of drains.") - observers = [] + observers = [] # type: List[ILogObserver] for observer in parse_drain_configs(log_config["drains"]): # Pipe drains diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 7f1e8f23f..0ebbde06f 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -21,10 +21,11 @@ import sys from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address from math import floor -from typing.io import TextIO +from typing import IO import attr from simplejson import dumps +from zope.interface import implementer from twisted.application.internet import ClientService from twisted.internet.endpoints import ( @@ -33,7 +34,7 @@ from twisted.internet.endpoints import ( TCP6ClientEndpoint, ) from twisted.internet.protocol import Factory, Protocol -from twisted.logger import FileLogObserver, Logger +from twisted.logger import FileLogObserver, ILogObserver, Logger from twisted.python.failure import Failure @@ -129,7 +130,7 @@ def flatten_event(event: dict, metadata: dict, include_time: bool = False): return new_event -def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: +def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver: """ A log observer that formats events to a flattened JSON representation. @@ -146,6 +147,7 @@ def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObs @attr.s +@implementer(ILogObserver) class TerseJSONToTCPLogObserver(object): """ An IObserver that writes JSON logs to a TCP target. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 63379bfb9..370000e37 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -42,13 +43,17 @@ try: # exception. resource.getrusage(RUSAGE_THREAD) + is_thread_resource_usage_supported = True + def get_thread_resource_usage(): return resource.getrusage(RUSAGE_THREAD) except Exception: # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we - # won't track resource usage by returning None. + # won't track resource usage. + is_thread_resource_usage_supported = False + def get_thread_resource_usage(): return None @@ -359,7 +364,11 @@ class LoggingContext(object): # When we stop, let's record the cpu used since we started if not self.usage_start: - logger.warning("Called stop on logcontext %s without calling start", self) + # Log a warning on platforms that support thread usage tracking + if is_thread_resource_usage_supported: + logger.warning( + "Called stop on logcontext %s without calling start", self + ) return utime_delta, stime_delta = self._get_cputime() diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 724625301..0638cec42 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -169,7 +169,9 @@ import contextlib import inspect import logging import re +import types from functools import wraps +from typing import Dict from canonicaljson import json @@ -223,8 +225,8 @@ try: from jaeger_client import Config as JaegerConfig from synapse.logging.scopecontextmanager import LogContextScopeManager except ImportError: - JaegerConfig = None - LogContextScopeManager = None + JaegerConfig = None # type: ignore + LogContextScopeManager = None # type: ignore logger = logging.getLogger(__name__) @@ -547,7 +549,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T return span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -584,7 +586,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): span = opentracing.tracer.active_span - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): @@ -639,7 +641,7 @@ def get_active_span_text_map(destination=None): if destination and not whitelisted_homeserver(destination): return {} - carrier = {} + carrier = {} # type: Dict[str, str] opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier ) @@ -653,7 +655,7 @@ def active_span_context_as_string(): Returns: The active span context encoded as a string. """ - carrier = {} + carrier = {} # type: Dict[str, str] if opentracing: opentracing.tracer.inject( opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier @@ -777,8 +779,7 @@ def trace_servlet(servlet_name, extract_context=False): return func @wraps(func) - @defer.inlineCallbacks - def _trace_servlet_inner(request, *args, **kwargs): + async def _trace_servlet_inner(request, *args, **kwargs): request_tags = { "request_id": request.get_request_id(), tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, @@ -795,8 +796,14 @@ def trace_servlet(servlet_name, extract_context=False): scope = start_active_span(servlet_name, tags=request_tags) with scope: - result = yield defer.maybeDeferred(func, request, *args, **kwargs) - return result + result = func(request, *args, **kwargs) + + if not isinstance(result, (types.CoroutineType, defer.Deferred)): + # Some servlets aren't async and just return results + # directly, so we handle that here. + return result + + return await result return _trace_servlet_inner diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py index 7df0fa608..6073fc272 100644 --- a/synapse/logging/utils.py +++ b/synapse/logging/utils.py @@ -119,7 +119,11 @@ def trace_function(f): logger = logging.getLogger(name) level = logging.DEBUG - s = inspect.currentframe().f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back to_print = [ "\t%s:%s %s. Args: args=%s, kwargs=%s" @@ -144,7 +148,7 @@ def trace_function(f): pathname=pathname, lineno=lineno, msg=msg, - args=None, + args=tuple(), exc_info=None, ) @@ -157,7 +161,12 @@ def trace_function(f): def get_previous_frames(): - s = inspect.currentframe().f_back.f_back + + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + + s = frame.f_back.f_back to_return = [] while s: if s.f_globals["__name__"].startswith("synapse"): @@ -174,7 +183,10 @@ def get_previous_frames(): def get_previous_frame(ignore=[]): - s = inspect.currentframe().f_back.f_back + frame = inspect.currentframe() + if frame is None: + raise Exception("Can't get current frame!") + s = frame.f_back.f_back while s: if s.f_globals["__name__"].startswith("synapse"): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 488280b4a..0b45e1f52 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -20,6 +20,7 @@ import os import platform import threading import time +from typing import Dict, Union import six @@ -29,20 +30,20 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF from twisted.internet import reactor +import synapse from synapse.metrics._exposition import ( MetricsResource, generate_latest, start_http_server, ) +from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) METRICS_PREFIX = "/_synapse/metrics" running_on_pypy = platform.python_implementation() == "PyPy" -all_metrics = [] -all_collectors = [] -all_gauges = {} +all_gauges = {} # type: Dict[str, Union[LaterGauge, InFlightGauge, BucketCollector]] HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") @@ -124,7 +125,7 @@ class InFlightGauge(object): ) # Counts number of in flight blocks for a given set of label values - self._registrations = {} + self._registrations = {} # type: Dict # Protects access to _registrations self._lock = threading.Lock() @@ -225,7 +226,7 @@ class BucketCollector(object): # Fetch the data -- this must be synchronous! data = self.data_collector() - buckets = {} + buckets = {} # type: Dict[float, int] res = [] for x in data.keys(): @@ -385,6 +386,16 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name" # finished being processed. event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) +# Build info of the running server. +build_info = Gauge( + "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"] +) +build_info.labels( + " ".join([platform.python_implementation(), platform.python_version()]), + get_version_string(synapse), + " ".join([platform.system(), platform.release()]), +).set(1) + last_ticked = time.time() diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index 1933ecd3e..a24810319 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -36,7 +36,9 @@ from twisted.web.resource import Resource try: from prometheus_client.samples import Sample except ImportError: - Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"]) + Sample = namedtuple( # type: ignore[no-redef] # noqa + "Sample", ["name", "labels", "value", "timestamp", "exemplar"] + ) CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index edd6b42db..c53d2a0d4 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -15,6 +15,8 @@ import logging import threading +from asyncio import iscoroutine +from functools import wraps import six @@ -173,7 +175,7 @@ def run_as_background_process(desc, func, *args, **kwargs): Args: desc (str): a description for this background process type - func: a function, which may return a Deferred + func: a function, which may return a Deferred or a coroutine args: positional args for func kwargs: keyword args for func @@ -197,7 +199,17 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes.setdefault(desc, set()).add(proc) try: - yield func(*args, **kwargs) + result = func(*args, **kwargs) + + # We probably don't have an ensureDeferred in our call stack to handle + # coroutine results, so we need to ensureDeferred here. + # + # But we need this check because ensureDeferred doesn't like being + # called on immediate values (as opposed to Deferreds or coroutines). + if iscoroutine(result): + result = defer.ensureDeferred(result) + + return (yield result) except Exception: logger.exception("Background process '%s' threw an exception", desc) finally: @@ -208,3 +220,20 @@ def run_as_background_process(desc, func, *args, **kwargs): with PreserveLoggingContext(): return run() + + +def wrap_as_background_process(desc): + """Decorator that wraps a function that gets called as a background + process. + + Equivalent of calling the function with `run_as_background_process` + """ + + def wrap_as_background_process_inner(func): + @wraps(func) + def wrap_as_background_process_inner_2(*args, **kwargs): + return run_as_background_process(desc, func, *args, **kwargs) + + return wrap_as_background_process_inner_2 + + return wrap_as_background_process_inner diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index bd5d53af9..629958780 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -22,6 +22,7 @@ from prometheus_client import Counter from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import PusherConfigException @@ -194,7 +195,17 @@ class HttpPusher(object): ) for push_action in unprocessed: - processed = yield self._process_one(push_action) + with opentracing.start_active_span( + "http-push", + tags={ + "authenticated_entity": self.user_id, + "event_id": push_action["event_id"], + "app_id": self.app_id, + "app_display_name": self.app_display_name, + }, + ): + processed = yield self._process_one(push_action) + if processed: http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 3dfd52784..5b16ab4ae 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -136,10 +136,11 @@ class Mailer(object): group together multiple email sending attempts sid (str): The generated session ID """ + params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( self.hs.config.public_baseurl - + "_matrix/client/unstable/password_reset/email/submit_token" - "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid) + + "_matrix/client/unstable/password_reset/email/submit_token?%s" + % urllib.parse.urlencode(params) ) template_vars = {"link": link} @@ -163,10 +164,11 @@ class Mailer(object): group together multiple email sending attempts sid (str): The generated session ID """ + params = {"token": token, "client_secret": client_secret, "sid": sid} link = ( self.hs.config.public_baseurl - + "_matrix/client/unstable/registration/email/submit_token" - "?token=%s&client_secret=%s&sid=%s" % (token, client_secret, sid) + + "_matrix/client/unstable/registration/email/submit_token?%s" + % urllib.parse.urlencode(params) ) template_vars = {"link": link} @@ -177,6 +179,35 @@ class Mailer(object): template_vars, ) + @defer.inlineCallbacks + def send_add_threepid_mail(self, email_address, token, client_secret, sid): + """Send an email with a validation link to a user for adding a 3pid to their account + + Args: + email_address (str): Email address we're sending the validation link to + + token (str): Unique token generated by the server to verify the email was received + + client_secret (str): Unique token generated by the client to group together + multiple email sending attempts + + sid (str): The generated session ID + """ + params = {"token": token, "client_secret": client_secret, "sid": sid} + link = ( + self.hs.config.public_baseurl + + "_matrix/client/unstable/add_threepid/email/submit_token?%s" + % urllib.parse.urlencode(params) + ) + + template_vars = {"link": link} + + yield self.send_email( + email_address, + "[%s] Validate Your Email" % self.hs.config.server_name, + template_vars, + ) + @defer.inlineCallbacks def send_notification_mail( self, app_id, user_id, email_address, push_actions, reason @@ -280,7 +311,7 @@ class Mailer(object): multipart_msg.attach(text_part) multipart_msg.attach(html_part) - logger.info("Sending email notification to %s" % email_address) + logger.info("Sending email to %s" % email_address) yield make_deferred_yieldable( self.sendmail( diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index ec0ac547c..aa7da1c54 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +from typing import List, Set from pkg_resources import ( DistributionNotFound, @@ -72,6 +73,7 @@ REQUIREMENTS = [ "netaddr>=0.7.18", "Jinja2>=2.9", "bleach>=1.4.3", + "typing-extensions>=3.7.4", ] CONDITIONAL_REQUIREMENTS = { @@ -97,7 +99,7 @@ CONDITIONAL_REQUIREMENTS = { "jwt": ["pyjwt>=1.6.4"], } -ALL_OPTIONAL_REQUIREMENTS = set() +ALL_OPTIONAL_REQUIREMENTS = set() # type: Set[str] for name, optional_deps in CONDITIONAL_REQUIREMENTS.items(): # Exclude systemd as it's a system-based requirement. @@ -143,16 +145,26 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: deps_needed.append(dependency) - errors.append("Needed %s but it was not installed" % (dependency,)) + if for_feature: + errors.append( + "Needed %s for the '%s' feature but it was not installed" + % (dependency, for_feature) + ) + else: + errors.append("Needed %s but it was not installed" % (dependency,)) if not for_feature: # Check the optional dependencies are up to date. We allow them to not be # installed. - OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) + OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), []) # type: List[str] for dependency in OPTS: try: @@ -161,15 +173,19 @@ def check_requirements(for_feature=None): deps_needed.append(dependency) errors.append( "Needed optional %s, got %s==%s" - % (dependency, e.dist.project_name, e.dist.version) + % ( + dependency, + e.dist.project_name, # type: ignore[attr-defined] # noqa + e.dist.version, # type: ignore[attr-defined] # noqa + ) ) except DistributionNotFound: # If it's not found, we don't care pass if deps_needed: - for e in errors: - logging.error(e) + for err in errors: + logging.error(err) raise DependencyException(deps_needed) diff --git a/synapse/res/templates/add_threepid.html b/synapse/res/templates/add_threepid.html new file mode 100644 index 000000000..cc4ab07e0 --- /dev/null +++ b/synapse/res/templates/add_threepid.html @@ -0,0 +1,9 @@ + + +

A request to add an email address to your Matrix account has been received. If this was you, please click the link below to confirm adding this email:

+ + {{ link }} + +

If this was not you, you can safely ignore this email. Thank you.

+ + diff --git a/synapse/res/templates/add_threepid.txt b/synapse/res/templates/add_threepid.txt new file mode 100644 index 000000000..a60c1ff65 --- /dev/null +++ b/synapse/res/templates/add_threepid.txt @@ -0,0 +1,6 @@ +A request to add an email address to your Matrix account has been received. If this was you, +please click the link below to confirm adding this email: + +{{ link }} + +If this was not you, you can safely ignore this email. Thank you. diff --git a/synapse/res/templates/add_threepid_failure.html b/synapse/res/templates/add_threepid_failure.html new file mode 100644 index 000000000..441d11c84 --- /dev/null +++ b/synapse/res/templates/add_threepid_failure.html @@ -0,0 +1,8 @@ + + + +

The request failed for the following reason: {{ failure_reason }}.

+ +

No changes have been made to your account.

+ + diff --git a/synapse/res/templates/add_threepid_success.html b/synapse/res/templates/add_threepid_success.html new file mode 100644 index 000000000..fbd6e4018 --- /dev/null +++ b/synapse/res/templates/add_threepid_success.html @@ -0,0 +1,6 @@ + + + +

Your email has now been validated, please return to your client. You may now close this window.

+ + diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 81b6bd881..939418ee2 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -23,8 +23,6 @@ import re from six import text_type from six.moves import http_client -from twisted.internet import defer - import synapse from synapse.api.constants import Membership, UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -46,6 +44,7 @@ from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet from synapse.rest.admin.users import UserAdminServlet from synapse.types import UserID, create_requester +from synapse.util.async_helpers import maybe_awaitable from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) @@ -59,15 +58,14 @@ class UsersRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): + async def on_GET(self, request, user_id): target_user = UserID.from_string(user_id) - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) if not self.hs.is_mine(target_user): raise SynapseError(400, "Can only users a local user") - ret = yield self.handlers.admin_handler.get_users() + ret = await self.handlers.admin_handler.get_users() return 200, ret @@ -122,8 +120,7 @@ class UserRegisterServlet(RestServlet): self.nonces[nonce] = int(self.reactor.seconds()) return 200, {"nonce": nonce} - @defer.inlineCallbacks - def on_POST(self, request): + async def on_POST(self, request): self._clear_old_nonces() if not self.hs.config.registration_shared_secret: @@ -204,14 +201,14 @@ class UserRegisterServlet(RestServlet): register = RegisterRestServlet(self.hs) - user_id = yield register.registration_handler.register_user( + user_id = await register.registration_handler.register_user( localpart=body["username"].lower(), password=body["password"], admin=bool(admin), user_type=user_type, ) - result = yield register._create_registration_details(user_id, body) + result = await register._create_registration_details(user_id, body) return 200, result @@ -223,19 +220,18 @@ class WhoisRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): + async def on_GET(self, request, user_id): target_user = UserID.from_string(user_id) - requester = yield self.auth.get_user_by_req(request) + requester = await self.auth.get_user_by_req(request) auth_user = requester.user if target_user != auth_user: - yield assert_user_is_admin(self.auth, auth_user) + await assert_user_is_admin(self.auth, auth_user) if not self.hs.is_mine(target_user): raise SynapseError(400, "Can only whois a local user") - ret = yield self.handlers.admin_handler.get_whois(target_user) + ret = await self.handlers.admin_handler.get_whois(target_user) return 200, ret @@ -255,9 +251,8 @@ class PurgeHistoryRestServlet(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id, event_id): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, room_id, event_id): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request, allow_empty_body=True) @@ -270,12 +265,12 @@ class PurgeHistoryRestServlet(RestServlet): event_id = body.get("purge_up_to_event_id") if event_id is not None: - event = yield self.store.get_event(event_id) + event = await self.store.get_event(event_id) if event.room_id != room_id: raise SynapseError(400, "Event is for wrong room.") - token = yield self.store.get_topological_token_for_event(event_id) + token = await self.store.get_topological_token_for_event(event_id) logger.info("[purge] purging up to token %s (event_id %s)", token, event_id) elif "purge_up_to_ts" in body: @@ -285,12 +280,10 @@ class PurgeHistoryRestServlet(RestServlet): 400, "purge_up_to_ts must be an int", errcode=Codes.BAD_JSON ) - stream_ordering = (yield self.store.find_first_stream_ordering_after_ts(ts)) + stream_ordering = await self.store.find_first_stream_ordering_after_ts(ts) - r = ( - yield self.store.get_room_event_after_stream_ordering( - room_id, stream_ordering - ) + r = await self.store.get_room_event_after_stream_ordering( + room_id, stream_ordering ) if not r: logger.warn( @@ -318,7 +311,7 @@ class PurgeHistoryRestServlet(RestServlet): errcode=Codes.BAD_JSON, ) - purge_id = yield self.pagination_handler.start_purge_history( + purge_id = self.pagination_handler.start_purge_history( room_id, token, delete_local_events=delete_local_events ) @@ -339,9 +332,8 @@ class PurgeHistoryStatusRestServlet(RestServlet): self.pagination_handler = hs.get_pagination_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_GET(self, request, purge_id): - yield assert_requester_is_admin(self.auth, request) + async def on_GET(self, request, purge_id): + await assert_requester_is_admin(self.auth, request) purge_status = self.pagination_handler.get_purge_status(purge_id) if purge_status is None: @@ -357,9 +349,8 @@ class DeactivateAccountRestServlet(RestServlet): self._deactivate_account_handler = hs.get_deactivate_account_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, target_user_id): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request, allow_empty_body=True) erase = body.get("erase", False) if not isinstance(erase, bool): @@ -371,7 +362,7 @@ class DeactivateAccountRestServlet(RestServlet): UserID.from_string(target_user_id) - result = yield self._deactivate_account_handler.deactivate_account( + result = await self._deactivate_account_handler.deactivate_account( target_user_id, erase ) if result: @@ -405,10 +396,9 @@ class ShutdownRoomRestServlet(RestServlet): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) content = parse_json_object_from_request(request) assert_params_in_dict(content, ["new_room_user_id"]) @@ -419,7 +409,7 @@ class ShutdownRoomRestServlet(RestServlet): message = content.get("message", self.DEFAULT_MESSAGE) room_name = content.get("room_name", "Content Violation Notification") - info = yield self._room_creation_handler.create_room( + info = await self._room_creation_handler.create_room( room_creator_requester, config={ "preset": "public_chat", @@ -438,9 +428,9 @@ class ShutdownRoomRestServlet(RestServlet): # This will work even if the room is already blocked, but that is # desirable in case the first attempt at blocking the room failed below. - yield self.store.block_room(room_id, requester_user_id) + await self.store.block_room(room_id, requester_user_id) - users = yield self.state.get_current_users_in_room(room_id) + users = await self.state.get_current_users_in_room(room_id) kicked_users = [] failed_to_kick_users = [] for user_id in users: @@ -451,7 +441,7 @@ class ShutdownRoomRestServlet(RestServlet): try: target_requester = create_requester(user_id) - yield self.room_member_handler.update_membership( + await self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=room_id, @@ -461,9 +451,9 @@ class ShutdownRoomRestServlet(RestServlet): require_consent=False, ) - yield self.room_member_handler.forget(target_requester.user, room_id) + await self.room_member_handler.forget(target_requester.user, room_id) - yield self.room_member_handler.update_membership( + await self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=new_room_id, @@ -480,7 +470,7 @@ class ShutdownRoomRestServlet(RestServlet): ) failed_to_kick_users.append(user_id) - yield self.event_creation_handler.create_and_send_nonmember_event( + await self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { "type": "m.room.message", @@ -491,9 +481,11 @@ class ShutdownRoomRestServlet(RestServlet): ratelimit=False, ) - aliases_for_room = yield self.store.get_aliases_for_room(room_id) + aliases_for_room = await maybe_awaitable( + self.store.get_aliases_for_room(room_id) + ) - yield self.store.update_aliases_for_room( + await self.store.update_aliases_for_room( room_id, new_room_id, requester_user_id ) @@ -532,13 +524,12 @@ class ResetPasswordRestServlet(RestServlet): self.auth = hs.get_auth() self._set_password_handler = hs.get_set_password_handler() - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): + async def on_POST(self, request, target_user_id): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. """ - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) UserID.from_string(target_user_id) @@ -546,7 +537,7 @@ class ResetPasswordRestServlet(RestServlet): assert_params_in_dict(params, ["new_password"]) new_password = params["new_password"] - yield self._set_password_handler.set_password( + await self._set_password_handler.set_password( target_user_id, new_password, requester ) return 200, {} @@ -572,12 +563,11 @@ class GetUsersPaginatedRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, target_user_id): + async def on_GET(self, request, target_user_id): """Get request to get specific number of users from Synapse. This needs user to have administrator access in Synapse. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(target_user_id) @@ -590,11 +580,10 @@ class GetUsersPaginatedRestServlet(RestServlet): logger.info("limit: %s, start: %s", limit, start) - ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit) + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) return 200, ret - @defer.inlineCallbacks - def on_POST(self, request, target_user_id): + async def on_POST(self, request, target_user_id): """Post request to get specific number of users from Synapse.. This needs user to have administrator access in Synapse. Example: @@ -608,7 +597,7 @@ class GetUsersPaginatedRestServlet(RestServlet): Returns: 200 OK with json object {list[dict[str, Any]], count} or empty object. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) UserID.from_string(target_user_id) order = "name" # order by name in user table @@ -618,7 +607,7 @@ class GetUsersPaginatedRestServlet(RestServlet): start = params["start"] logger.info("limit: %s, start: %s", limit, start) - ret = yield self.handlers.admin_handler.get_users_paginate(order, start, limit) + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) return 200, ret @@ -641,13 +630,12 @@ class SearchUsersRestServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, target_user_id): + async def on_GET(self, request, target_user_id): """Get request to search user table for specific users according to search term. This needs user to have a administrator access in Synapse. """ - yield assert_requester_is_admin(self.auth, request) + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(target_user_id) @@ -661,7 +649,7 @@ class SearchUsersRestServlet(RestServlet): term = parse_string(request, "term", required=True) logger.info("term: %s ", term) - ret = yield self.handlers.admin_handler.search_users(term) + ret = await self.handlers.admin_handler.search_users(term) return 200, ret @@ -676,15 +664,14 @@ class DeleteGroupAdminRestServlet(RestServlet): self.is_mine_id = hs.is_mine_id self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, group_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, group_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) if not self.is_mine_id(group_id): raise SynapseError(400, "Can only delete local groups") - yield self.group_server.delete_group(group_id, requester.user.to_string()) + await self.group_server.delete_group(group_id, requester.user.to_string()) return 200, {} @@ -700,16 +687,15 @@ class AccountValidityRenewServlet(RestServlet): self.account_activity_handler = hs.get_account_validity_handler() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request) if "user_id" not in body: raise SynapseError(400, "Missing property 'user_id' in the request body") - expiration_ts = yield self.account_activity_handler.renew_account_for_user( + expiration_ts = await self.account_activity_handler.renew_account_for_user( body["user_id"], body.get("expiration_ts"), not body.get("enable_renewal_emails", True), diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py index 5a9b08d3e..afd064720 100644 --- a/synapse/rest/admin/_base.py +++ b/synapse/rest/admin/_base.py @@ -15,8 +15,6 @@ import re -from twisted.internet import defer - from synapse.api.errors import AuthError @@ -42,8 +40,7 @@ def historical_admin_path_patterns(path_regex): ) -@defer.inlineCallbacks -def assert_requester_is_admin(auth, request): +async def assert_requester_is_admin(auth, request): """Verify that the requester is an admin user WARNING: MAKE SURE YOU YIELD ON THE RESULT! @@ -58,12 +55,11 @@ def assert_requester_is_admin(auth, request): Raises: AuthError if the requester is not an admin """ - requester = yield auth.get_user_by_req(request) - yield assert_user_is_admin(auth, requester.user) + requester = await auth.get_user_by_req(request) + await assert_user_is_admin(auth, requester.user) -@defer.inlineCallbacks -def assert_user_is_admin(auth, user_id): +async def assert_user_is_admin(auth, user_id): """Verify that the given user is an admin user WARNING: MAKE SURE YOU YIELD ON THE RESULT! @@ -79,6 +75,6 @@ def assert_user_is_admin(auth, user_id): AuthError if the user is not an admin """ - is_admin = yield auth.is_server_admin(user_id) + is_admin = await auth.is_server_admin(user_id) if not is_admin: raise AuthError(403, "You are not a server admin") diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index ed7086d09..fa833e54c 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -16,8 +16,6 @@ import logging -from twisted.internet import defer - from synapse.api.errors import AuthError from synapse.http.servlet import RestServlet, parse_integer from synapse.rest.admin._base import ( @@ -40,12 +38,11 @@ class QuarantineMediaInRoom(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) - num_quarantined = yield self.store.quarantine_media_ids_in_room( + num_quarantined = await self.store.quarantine_media_ids_in_room( room_id, requester.user.to_string() ) @@ -62,14 +59,13 @@ class ListMediaInRoom(RestServlet): self.store = hs.get_datastore() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_GET(self, request, room_id): - requester = yield self.auth.get_user_by_req(request) - is_admin = yield self.auth.is_server_admin(requester.user) + async def on_GET(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + is_admin = await self.auth.is_server_admin(requester.user) if not is_admin: raise AuthError(403, "You are not a server admin") - local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + local_mxcs, remote_mxcs = await self.store.get_media_mxcs_in_room(room_id) return 200, {"local": local_mxcs, "remote": remote_mxcs} @@ -81,14 +77,13 @@ class PurgeMediaCacheRestServlet(RestServlet): self.media_repository = hs.get_media_repository() self.auth = hs.get_auth() - @defer.inlineCallbacks - def on_POST(self, request): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) before_ts = parse_integer(request, "before_ts", required=True) logger.info("before_ts: %r", before_ts) - ret = yield self.media_repository.delete_old_remote_media(before_ts) + ret = await self.media_repository.delete_old_remote_media(before_ts) return 200, ret diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py index ae2cbe2e0..6e9a87412 100644 --- a/synapse/rest/admin/server_notice_servlet.py +++ b/synapse/rest/admin/server_notice_servlet.py @@ -14,8 +14,6 @@ # limitations under the License. import re -from twisted.internet import defer - from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.http.servlet import ( @@ -69,9 +67,8 @@ class SendServerNoticeServlet(RestServlet): self.__class__.__name__, ) - @defer.inlineCallbacks - def on_POST(self, request, txn_id=None): - yield assert_requester_is_admin(self.auth, request) + async def on_POST(self, request, txn_id=None): + await assert_requester_is_admin(self.auth, request) body = parse_json_object_from_request(request) assert_params_in_dict(body, ("user_id", "content")) event_type = body.get("type", EventTypes.Message) @@ -85,7 +82,7 @@ class SendServerNoticeServlet(RestServlet): if not self.hs.is_mine_id(user_id): raise SynapseError(400, "Server notices can only be sent to local users") - event = yield self.snm.send_notice( + event = await self.snm.send_notice( user_id=body["user_id"], type=event_type, state_key=state_key, diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 9720a3bab..d5d124a0d 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -14,8 +14,6 @@ # limitations under the License. import re -from twisted.internet import defer - from synapse.api.errors import SynapseError from synapse.http.servlet import ( RestServlet, @@ -59,24 +57,22 @@ class UserAdminServlet(RestServlet): self.auth = hs.get_auth() self.handlers = hs.get_handlers() - @defer.inlineCallbacks - def on_GET(self, request, user_id): - yield assert_requester_is_admin(self.auth, request) + async def on_GET(self, request, user_id): + await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(user_id) if not self.hs.is_mine(target_user): raise SynapseError(400, "Only local users can be admins of this homeserver") - is_admin = yield self.handlers.admin_handler.get_user_server_admin(target_user) + is_admin = await self.handlers.admin_handler.get_user_server_admin(target_user) is_admin = bool(is_admin) return 200, {"admin": is_admin} - @defer.inlineCallbacks - def on_PUT(self, request, user_id): - requester = yield self.auth.get_user_by_req(request) - yield assert_user_is_admin(self.auth, requester.user) + async def on_PUT(self, request, user_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) auth_user = requester.user target_user = UserID.from_string(user_id) @@ -93,7 +89,7 @@ class UserAdminServlet(RestServlet): if target_user == auth_user and not set_admin_to: raise SynapseError(400, "You may not demote yourself.") - yield self.handlers.admin_handler.set_user_server_admin( + await self.handlers.admin_handler.set_user_server_admin( target_user, set_admin_to ) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 25a1b6709..8414af08c 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -29,6 +29,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.client.v2_alpha._base import client_patterns from synapse.rest.well_known import WellKnownBuilder from synapse.types import UserID, map_username_to_mxid_localpart @@ -376,6 +377,7 @@ class CasTicketServlet(RestServlet): super(CasTicketServlet, self).__init__() self.cas_server_url = hs.config.cas_server_url self.cas_service_url = hs.config.cas_service_url + self.cas_displayname_attribute = hs.config.cas_displayname_attribute self.cas_required_attributes = hs.config.cas_required_attributes self._sso_auth_handler = SSOAuthHandler(hs) self._http_client = hs.get_simple_http_client() @@ -399,6 +401,7 @@ class CasTicketServlet(RestServlet): def handle_cas_response(self, request, cas_response_body, client_redirect_url): user, attributes = self.parse_cas_response(cas_response_body) + displayname = attributes.pop(self.cas_displayname_attribute, None) for required_attribute, required_value in self.cas_required_attributes.items(): # If required attribute was not in CAS Response - Forbidden @@ -413,7 +416,7 @@ class CasTicketServlet(RestServlet): raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED) return self._sso_auth_handler.on_successful_auth( - user, request, client_redirect_url + user, request, client_redirect_url, displayname ) def parse_cas_response(self, cas_response_body): @@ -507,6 +510,19 @@ class SSOAuthHandler(object): localpart=localpart, default_display_name=user_display_name ) + self.complete_sso_login(registered_user_id, request, client_redirect_url) + + def complete_sso_login( + self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str + ): + """Having figured out a mxid for this user, complete the HTTP request + + Args: + registered_user_id: + request: + client_redirect_url: + """ + login_token = self._macaroon_gen.generate_short_term_login_token( registered_user_id ) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 358225902..9c1d41421 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -39,6 +39,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.logging.opentracing import set_tag from synapse.rest.client.transactions import HttpTransactionCache from synapse.rest.client.v2_alpha._base import client_patterns from synapse.storage.state import StateFilter @@ -81,6 +82,7 @@ class RoomCreateRestServlet(TransactionRestServlet): ) def on_PUT(self, request, txn_id): + set_tag("txn_id", txn_id) return self.txns.fetch_or_execute_request(request, self.on_POST, request) @defer.inlineCallbacks @@ -181,6 +183,9 @@ class RoomStateEventRestServlet(TransactionRestServlet): def on_PUT(self, request, room_id, event_type, state_key, txn_id=None): requester = yield self.auth.get_user_by_req(request) + if txn_id: + set_tag("txn_id", txn_id) + content = parse_json_object_from_request(request) event_dict = { @@ -209,6 +214,7 @@ class RoomStateEventRestServlet(TransactionRestServlet): ret = {} if event: + set_tag("event_id", event.event_id) ret = {"event_id": event.event_id} return 200, ret @@ -244,12 +250,15 @@ class RoomSendEventRestServlet(TransactionRestServlet): requester, event_dict, txn_id=txn_id ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_GET(self, request, room_id, event_type, txn_id): return 200, "Not implemented" def on_PUT(self, request, room_id, event_type, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_type, txn_id ) @@ -310,6 +319,8 @@ class JoinRoomAliasServlet(TransactionRestServlet): return 200, {"room_id": room_id} def on_PUT(self, request, room_identifier, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_identifier, txn_id ) @@ -350,6 +361,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): limit = parse_integer(request, "limit", 0) since_token = parse_string(request, "since", None) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( @@ -387,6 +402,10 @@ class PublicRoomListRestServlet(TransactionRestServlet): else: network_tuple = ThirdPartyInstanceID.from_string(third_party_instance_id) + if limit == 0: + # zero is a special value which corresponds to no limit. + limit = None + handler = self.hs.get_room_list_handler() if server: data = yield handler.get_remote_public_room_list( @@ -655,6 +674,8 @@ class RoomForgetRestServlet(TransactionRestServlet): return 200, {} def on_PUT(self, request, room_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, txn_id ) @@ -701,6 +722,7 @@ class RoomMembershipRestServlet(TransactionRestServlet): content["id_server"], requester, txn_id, + content.get("id_access_token"), ) return 200, {} @@ -737,6 +759,8 @@ class RoomMembershipRestServlet(TransactionRestServlet): return True def on_PUT(self, request, room_id, membership_action, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, membership_action, txn_id ) @@ -770,9 +794,12 @@ class RoomRedactEventRestServlet(TransactionRestServlet): txn_id=txn_id, ) + set_tag("event_id", event.event_id) return 200, {"event_id": event.event_id} def on_PUT(self, request, room_id, event_id, txn_id): + set_tag("txn_id", txn_id) + return self.txns.fetch_or_execute_request( request, self.on_POST, request, room_id, event_id, txn_id ) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 785d01ea5..80cf7126a 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -103,16 +103,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - # Have the configured identity server handle the request - if not self.hs.config.account_threepid_delegate_email: - logger.warn( - "No upstream email account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, "Password reset by email is not supported on this homeserver" - ) + assert self.hs.config.account_threepid_delegate_email + # Have the configured identity server handle the request ret = yield self.identity_handler.requestEmailToken( self.hs.config.account_threepid_delegate_email, email, @@ -136,71 +129,11 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): return 200, ret -class MsisdnPasswordRequestTokenRestServlet(RestServlet): - PATTERNS = client_patterns("/account/password/msisdn/requestToken$") - - def __init__(self, hs): - super(MsisdnPasswordRequestTokenRestServlet, self).__init__() - self.hs = hs - self.datastore = self.hs.get_datastore() - self.identity_handler = hs.get_handlers().identity_handler - - @defer.inlineCallbacks - def on_POST(self, request): - body = parse_json_object_from_request(request) - - assert_params_in_dict( - body, ["client_secret", "country", "phone_number", "send_attempt"] - ) - client_secret = body["client_secret"] - country = body["country"] - phone_number = body["phone_number"] - send_attempt = body["send_attempt"] - next_link = body.get("next_link") # Optional param - - msisdn = phone_number_to_msisdn(country, phone_number) - - if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError( - 403, - "Account phone numbers are not authorized on this server", - Codes.THREEPID_DENIED, - ) - - existing_user_id = yield self.datastore.get_user_id_by_threepid( - "msisdn", msisdn - ) - - if existing_user_id is None: - raise SynapseError(400, "MSISDN not found", Codes.THREEPID_NOT_FOUND) - - if not self.hs.config.account_threepid_delegate_msisdn: - logger.warn( - "No upstream msisdn account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, - "Password reset by phone number is not supported on this homeserver", - ) - - ret = yield self.identity_handler.requestMsisdnToken( - self.hs.config.account_threepid_delegate_msisdn, - country, - phone_number, - client_secret, - send_attempt, - next_link, - ) - - return 200, ret - - class PasswordResetSubmitTokenServlet(RestServlet): """Handles 3PID validation token submission""" PATTERNS = client_patterns( - "/password_reset/(?P[^/]*)/submit_token/*$", releases=(), unstable=True + "/password_reset/(?P[^/]*)/submit_token$", releases=(), unstable=True ) def __init__(self, hs): @@ -214,6 +147,11 @@ class PasswordResetSubmitTokenServlet(RestServlet): self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_password_reset_template_failure_html], + ) @defer.inlineCallbacks def on_GET(self, request, medium): @@ -261,34 +199,12 @@ class PasswordResetSubmitTokenServlet(RestServlet): request.setResponseCode(e.code) # Show a failure page with a reason - html_template, = load_jinja2_templates( - self.config.email_template_dir, - [self.config.email_password_reset_template_failure_html], - ) - template_vars = {"failure_reason": e.msg} - html = html_template.render(**template_vars) + html = self.failure_email_template.render(**template_vars) request.write(html.encode("utf-8")) finish_request(request) - @defer.inlineCallbacks - def on_POST(self, request, medium): - if medium != "email": - raise SynapseError( - 400, "This medium is currently not supported for password resets" - ) - - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["sid", "client_secret", "token"]) - - valid, _ = yield self.store.validate_threepid_session( - body["sid"], body["client_secret"], body["token"], self.clock.time_msec() - ) - response_code = 200 if valid else 400 - - return response_code, {"success": valid} - class PasswordRestServlet(RestServlet): PATTERNS = client_patterns("/account/password$") @@ -325,9 +241,7 @@ class PasswordRestServlet(RestServlet): else: requester = None result, params, _ = yield self.auth_handler.check_auth( - [[LoginType.EMAIL_IDENTITY], [LoginType.MSISDN]], - body, - self.hs.get_ip_from_request(request), + [[LoginType.EMAIL_IDENTITY]], body, self.hs.get_ip_from_request(request) ) if LoginType.EMAIL_IDENTITY in result: @@ -416,13 +330,35 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.store = self.hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + template_html, template_text = load_jinja2_templates( + self.config.email_template_dir, + [ + self.config.email_add_threepid_template_html, + self.config.email_add_threepid_template_text, + ], + public_baseurl=self.config.public_baseurl, + ) + self.mailer = Mailer( + hs=self.hs, + app_name=self.config.email_app_name, + template_html=template_html, + template_text=template_text, + ) + @defer.inlineCallbacks def on_POST(self, request): + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "Adding emails have been disabled due to lack of an email config" + ) + raise SynapseError( + 400, "Adding an email to your account is disabled on this server" + ) + body = parse_json_object_from_request(request) - assert_params_in_dict( - body, ["id_server", "client_secret", "email", "send_attempt"] - ) - id_server = "https://" + body["id_server"] # Assume https + assert_params_in_dict(body, ["client_secret", "email", "send_attempt"]) client_secret = body["client_secret"] email = body["email"] send_attempt = body["send_attempt"] @@ -442,9 +378,30 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if existing_user_id is not None: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - ret = yield self.identity_handler.requestEmailToken( - id_server, email, client_secret, send_attempt, next_link - ) + if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + assert self.hs.config.account_threepid_delegate_email + + # Have the configured identity server handle the request + ret = yield self.identity_handler.requestEmailToken( + self.hs.config.account_threepid_delegate_email, + email, + client_secret, + send_attempt, + next_link, + ) + else: + # Send threepid validation emails from Synapse + sid = yield self.identity_handler.send_threepid_validation( + email, + client_secret, + send_attempt, + self.mailer.send_add_threepid_mail, + next_link, + ) + + # Wrap the session id in a JSON object + ret = {"sid": sid} + return 200, ret @@ -461,10 +418,8 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): def on_POST(self, request): body = parse_json_object_from_request(request) assert_params_in_dict( - body, - ["id_server", "client_secret", "country", "phone_number", "send_attempt"], + body, ["client_secret", "country", "phone_number", "send_attempt"] ) - id_server = "https://" + body["id_server"] # Assume https client_secret = body["client_secret"] country = body["country"] phone_number = body["phone_number"] @@ -485,12 +440,146 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): if existing_user_id is not None: raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) + if not self.hs.config.account_threepid_delegate_msisdn: + logger.warn( + "No upstream msisdn account_threepid_delegate configured on the server to " + "handle this request" + ) + raise SynapseError( + 400, + "Adding phone numbers to user account is not supported by this homeserver", + ) + ret = yield self.identity_handler.requestMsisdnToken( - id_server, country, phone_number, client_secret, send_attempt, next_link + self.hs.config.account_threepid_delegate_msisdn, + country, + phone_number, + client_secret, + send_attempt, + next_link, ) + return 200, ret +class AddThreepidEmailSubmitTokenServlet(RestServlet): + """Handles 3PID validation token submission for adding an email to a user's account""" + + PATTERNS = client_patterns( + "/add_threepid/email/submit_token$", releases=(), unstable=True + ) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super().__init__() + self.config = hs.config + self.clock = hs.get_clock() + self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_add_threepid_template_failure_html], + ) + + @defer.inlineCallbacks + def on_GET(self, request): + if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.local_threepid_handling_disabled_due_to_email_config: + logger.warn( + "Adding emails have been disabled due to lack of an email config" + ) + raise SynapseError( + 400, "Adding an email to your account is disabled on this server" + ) + elif self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + raise SynapseError( + 400, + "This homeserver is not validating threepids. Use an identity server " + "instead.", + ) + + sid = parse_string(request, "sid", required=True) + client_secret = parse_string(request, "client_secret", required=True) + token = parse_string(request, "token", required=True) + + # Attempt to validate a 3PID session + try: + # Mark the session as valid + next_link = yield self.store.validate_threepid_session( + sid, client_secret, token, self.clock.time_msec() + ) + + # Perform a 302 redirect if next_link is set + if next_link: + if next_link.startswith("file:///"): + logger.warn( + "Not redirecting to next_link as it is a local file: address" + ) + else: + request.setResponseCode(302) + request.setHeader("Location", next_link) + finish_request(request) + return None + + # Otherwise show the success template + html = self.config.email_add_threepid_template_success_html_content + request.setResponseCode(200) + except ThreepidValidationError as e: + request.setResponseCode(e.code) + + # Show a failure page with a reason + template_vars = {"failure_reason": e.msg} + html = self.failure_email_template.render(**template_vars) + + request.write(html.encode("utf-8")) + finish_request(request) + + +class AddThreepidMsisdnSubmitTokenServlet(RestServlet): + """Handles 3PID validation token submission for adding a phone number to a user's + account + """ + + PATTERNS = client_patterns( + "/add_threepid/msisdn/submit_token$", releases=(), unstable=True + ) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super().__init__() + self.config = hs.config + self.clock = hs.get_clock() + self.store = hs.get_datastore() + self.identity_handler = hs.get_handlers().identity_handler + + @defer.inlineCallbacks + def on_POST(self, request): + if not self.config.account_threepid_delegate_msisdn: + raise SynapseError( + 400, + "This homeserver is not validating phone numbers. Use an identity server " + "instead.", + ) + + body = parse_json_object_from_request(request) + assert_params_in_dict(body, ["client_secret", "sid", "token"]) + + # Proxy submit_token request to msisdn threepid delegate + response = yield self.identity_handler.proxy_msisdn_submit_token( + self.config.account_threepid_delegate_msisdn, + body["client_secret"], + body["sid"], + body["token"], + ) + return 200, response + + class ThreepidRestServlet(RestServlet): PATTERNS = client_patterns("/account/3pid$") @@ -512,6 +601,8 @@ class ThreepidRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() body = parse_json_object_from_request(request) threepid_creds = body.get("threePidCreds") or body.get("three_pid_creds") @@ -519,33 +610,96 @@ class ThreepidRestServlet(RestServlet): raise SynapseError( 400, "Missing param three_pid_creds", Codes.MISSING_PARAM ) + assert_params_in_dict(threepid_creds, ["client_secret", "sid"]) + + client_secret = threepid_creds["client_secret"] + sid = threepid_creds["sid"] + + validation_session = yield self.identity_handler.validate_threepid_session( + client_secret, sid + ) + if validation_session: + yield self.auth_handler.add_threepid( + user_id, + validation_session["medium"], + validation_session["address"], + validation_session["validated_at"], + ) + return 200, {} + + raise SynapseError( + 400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED + ) + + +class ThreepidAddRestServlet(RestServlet): + PATTERNS = client_patterns("/account/3pid/add$", releases=(), unstable=True) + + def __init__(self, hs): + super(ThreepidAddRestServlet, self).__init__() + self.hs = hs + self.identity_handler = hs.get_handlers().identity_handler + self.auth = hs.get_auth() + self.auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def on_POST(self, request): + requester = yield self.auth.get_user_by_req(request) + user_id = requester.user.to_string() + body = parse_json_object_from_request(request) + + assert_params_in_dict(body, ["client_secret", "sid"]) + client_secret = body["client_secret"] + sid = body["sid"] + + validation_session = yield self.identity_handler.validate_threepid_session( + client_secret, sid + ) + if validation_session: + yield self.auth_handler.add_threepid( + user_id, + validation_session["medium"], + validation_session["address"], + validation_session["validated_at"], + ) + return 200, {} + + raise SynapseError( + 400, "No validated 3pid session found", Codes.THREEPID_AUTH_FAILED + ) + + +class ThreepidBindRestServlet(RestServlet): + PATTERNS = client_patterns("/account/3pid/bind$", releases=(), unstable=True) + + def __init__(self, hs): + super(ThreepidBindRestServlet, self).__init__() + self.hs = hs + self.identity_handler = hs.get_handlers().identity_handler + self.auth = hs.get_auth() + + @defer.inlineCallbacks + def on_POST(self, request): + body = parse_json_object_from_request(request) + + assert_params_in_dict(body, ["id_server", "sid", "client_secret"]) + id_server = body["id_server"] + sid = body["sid"] + client_secret = body["client_secret"] + id_access_token = body.get("id_access_token") # optional requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - threepid = yield self.identity_handler.threepid_from_creds(threepid_creds) - - if not threepid: - raise SynapseError(400, "Failed to auth 3pid", Codes.THREEPID_AUTH_FAILED) - - for reqd in ["medium", "address", "validated_at"]: - if reqd not in threepid: - logger.warn("Couldn't add 3pid: invalid response from ID server") - raise SynapseError(500, "Invalid response from ID Server") - - yield self.auth_handler.add_threepid( - user_id, threepid["medium"], threepid["address"], threepid["validated_at"] + yield self.identity_handler.bind_threepid( + client_secret, sid, user_id, id_server, id_access_token ) - if "bind" in body and body["bind"]: - logger.debug("Binding threepid %s to %s", threepid, user_id) - yield self.identity_handler.bind_threepid(threepid_creds, user_id) - return 200, {} class ThreepidUnbindRestServlet(RestServlet): - PATTERNS = client_patterns("/account/3pid/unbind$") + PATTERNS = client_patterns("/account/3pid/unbind$", releases=(), unstable=True) def __init__(self, hs): super(ThreepidUnbindRestServlet, self).__init__() @@ -627,13 +781,16 @@ class WhoamiRestServlet(RestServlet): def register_servlets(hs, http_server): EmailPasswordRequestTokenRestServlet(hs).register(http_server) - MsisdnPasswordRequestTokenRestServlet(hs).register(http_server) PasswordResetSubmitTokenServlet(hs).register(http_server) PasswordRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) EmailThreepidRequestTokenRestServlet(hs).register(http_server) MsisdnThreepidRequestTokenRestServlet(hs).register(http_server) + AddThreepidEmailSubmitTokenServlet(hs).register(http_server) + AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server) ThreepidRestServlet(hs).register(http_server) + ThreepidAddRestServlet(hs).register(http_server) + ThreepidBindRestServlet(hs).register(http_server) ThreepidUnbindRestServlet(hs).register(http_server) ThreepidDeleteRestServlet(hs).register(http_server) WhoamiRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/filter.py b/synapse/rest/client/v2_alpha/filter.py index c6ddf24c8..17a8bc736 100644 --- a/synapse/rest/client/v2_alpha/filter.py +++ b/synapse/rest/client/v2_alpha/filter.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.errors import AuthError, Codes, StoreError, SynapseError +from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseError from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import UserID @@ -52,13 +52,15 @@ class GetFilterRestServlet(RestServlet): raise SynapseError(400, "Invalid filter_id") try: - filter = yield self.filtering.get_user_filter( + filter_collection = yield self.filtering.get_user_filter( user_localpart=target_user.localpart, filter_id=filter_id ) + except StoreError as e: + if e.code != 404: + raise + raise NotFoundError("No such filter") - return 200, filter.get_filter_json() - except (KeyError, StoreError): - raise SynapseError(400, "No such filter", errcode=Codes.NOT_FOUND) + return 200, filter_collection.get_filter_json() class CreateFilterRestServlet(RestServlet): diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 5c7a5f357..4f24a124a 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -16,6 +16,7 @@ import hmac import logging +from typing import List, Union from six import string_types @@ -31,9 +32,14 @@ from synapse.api.errors import ( ThreepidValidationError, UnrecognizedRequestError, ) +from synapse.config import ConfigError +from synapse.config.captcha import CaptchaConfig +from synapse.config.consent_config import ConsentConfig from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.ratelimiting import FederationRateLimitConfig +from synapse.config.registration import RegistrationConfig from synapse.config.server import is_threepid_reserved +from synapse.handlers.auth import AuthHandler from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -131,15 +137,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: - if not self.hs.config.account_threepid_delegate_email: - logger.warn( - "No upstream email account_threepid_delegate configured on the server to " - "handle this request" - ) - raise SynapseError( - 400, "Registration by email is not supported on this homeserver" - ) + assert self.hs.config.account_threepid_delegate_email + # Have the configured identity server handle the request ret = yield self.identity_handler.requestEmailToken( self.hs.config.account_threepid_delegate_email, email, @@ -246,6 +246,18 @@ class RegistrationSubmitTokenServlet(RestServlet): self.clock = hs.get_clock() self.store = hs.get_datastore() + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_registration_template_failure_html], + ) + + if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + self.failure_email_template, = load_jinja2_templates( + self.config.email_template_dir, + [self.config.email_registration_template_failure_html], + ) + @defer.inlineCallbacks def on_GET(self, request, medium): if medium != "email": @@ -289,17 +301,11 @@ class RegistrationSubmitTokenServlet(RestServlet): request.setResponseCode(200) except ThreepidValidationError as e: - # Show a failure page with a reason request.setResponseCode(e.code) # Show a failure page with a reason - html_template, = load_jinja2_templates( - self.config.email_template_dir, - [self.config.email_registration_template_failure_html], - ) - template_vars = {"failure_reason": e.msg} - html = html_template.render(**template_vars) + html = self.failure_email_template.render(**template_vars) request.write(html.encode("utf-8")) finish_request(request) @@ -334,6 +340,11 @@ class UsernameAvailabilityRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request): + if not self.hs.config.enable_registration: + raise SynapseError( + 403, "Registration has been disabled", errcode=Codes.FORBIDDEN + ) + ip = self.hs.get_ip_from_request(request) with self.ratelimiter.ratelimit(ip) as wait_deferred: yield wait_deferred @@ -366,6 +377,10 @@ class RegisterRestServlet(RestServlet): self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() + self._registration_flows = _calculate_registration_flows( + hs.config, self.auth_handler + ) + @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): @@ -486,69 +501,8 @@ class RegisterRestServlet(RestServlet): assigned_user_id=registered_user_id, ) - # FIXME: need a better error than "no auth flow found" for scenarios - # where we required 3PID for registration but the user didn't give one - require_email = "email" in self.hs.config.registrations_require_3pid - require_msisdn = "msisdn" in self.hs.config.registrations_require_3pid - - show_msisdn = True - if self.hs.config.disable_msisdn_registration: - show_msisdn = False - require_msisdn = False - - flows = [] - if self.hs.config.enable_registration_captcha: - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - # Also add a dummy flow here, otherwise if a client completes - # recaptcha first we'll assume they were going for this flow - # and complete the request, when they could have been trying to - # complete one of the flows with email/msisdn auth. - flows.extend([[LoginType.RECAPTCHA, LoginType.DUMMY]]) - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([[LoginType.RECAPTCHA, LoginType.EMAIL_IDENTITY]]) - - if show_msisdn: - # only support the MSISDN-only flow if we don't require email 3PIDs - if not require_email: - flows.extend([[LoginType.RECAPTCHA, LoginType.MSISDN]]) - # always let users provide both MSISDN & email - flows.extend( - [[LoginType.RECAPTCHA, LoginType.MSISDN, LoginType.EMAIL_IDENTITY]] - ) - else: - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([[LoginType.DUMMY]]) - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([[LoginType.EMAIL_IDENTITY]]) - - if show_msisdn: - # only support the MSISDN-only flow if we don't require email 3PIDs - if not require_email or require_msisdn: - flows.extend([[LoginType.MSISDN]]) - # always let users provide both MSISDN & email - flows.extend([[LoginType.MSISDN, LoginType.EMAIL_IDENTITY]]) - - # Append m.login.terms to all flows if we're requiring consent - if self.hs.config.user_consent_at_registration: - new_flows = [] - for flow in flows: - inserted = False - # m.login.terms should go near the end but before msisdn or email auth - for i, stage in enumerate(flow): - if stage == LoginType.EMAIL_IDENTITY or stage == LoginType.MSISDN: - flow.insert(i, LoginType.TERMS) - inserted = True - break - if not inserted: - flow.append(LoginType.TERMS) - flows.extend(new_flows) - auth_result, params, session_id = yield self.auth_handler.check_auth( - flows, body, self.hs.get_ip_from_request(request) + self._registration_flows, body, self.hs.get_ip_from_request(request) ) # Check that we're not trying to register a denied 3pid. @@ -711,6 +665,83 @@ class RegisterRestServlet(RestServlet): ) +def _calculate_registration_flows( + # technically `config` has to provide *all* of these interfaces, not just one + config: Union[RegistrationConfig, ConsentConfig, CaptchaConfig], + auth_handler: AuthHandler, +) -> List[List[str]]: + """Get a suitable flows list for registration + + Args: + config: server configuration + auth_handler: authorization handler + + Returns: a list of supported flows + """ + # FIXME: need a better error than "no auth flow found" for scenarios + # where we required 3PID for registration but the user didn't give one + require_email = "email" in config.registrations_require_3pid + require_msisdn = "msisdn" in config.registrations_require_3pid + + show_msisdn = True + show_email = True + + if config.disable_msisdn_registration: + show_msisdn = False + require_msisdn = False + + enabled_auth_types = auth_handler.get_enabled_auth_types() + if LoginType.EMAIL_IDENTITY not in enabled_auth_types: + show_email = False + if require_email: + raise ConfigError( + "Configuration requires email address at registration, but email " + "validation is not configured" + ) + + if LoginType.MSISDN not in enabled_auth_types: + show_msisdn = False + if require_msisdn: + raise ConfigError( + "Configuration requires msisdn at registration, but msisdn " + "validation is not configured" + ) + + flows = [] + + # only support 3PIDless registration if no 3PIDs are required + if not require_email and not require_msisdn: + # Add a dummy step here, otherwise if a client completes + # recaptcha first we'll assume they were going for this flow + # and complete the request, when they could have been trying to + # complete one of the flows with email/msisdn auth. + flows.append([LoginType.DUMMY]) + + # only support the email-only flow if we don't require MSISDN 3PIDs + if show_email and not require_msisdn: + flows.append([LoginType.EMAIL_IDENTITY]) + + # only support the MSISDN-only flow if we don't require email 3PIDs + if show_msisdn and not require_email: + flows.append([LoginType.MSISDN]) + + if show_email and show_msisdn: + # always let users provide both MSISDN & email + flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY]) + + # Prepend m.login.terms to all flows if we're requiring consent + if config.user_consent_at_registration: + for flow in flows: + flow.insert(0, LoginType.TERMS) + + # Prepend recaptcha to all flows if we're requiring captcha + if config.enable_registration_captcha: + for flow in flows: + flow.insert(0, LoginType.RECAPTCHA) + + return flows + + def register_servlets(hs, http_server): EmailRegisterRequestTokenRestServlet(hs).register(http_server) MsisdnRegisterRequestTokenRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py index df4f44cd3..d59678643 100644 --- a/synapse/rest/client/v2_alpha/room_keys.py +++ b/synapse/rest/client/v2_alpha/room_keys.py @@ -375,7 +375,7 @@ class RoomKeysVersionServlet(RestServlet): "ed25519:something": "hijklmnop" } }, - "version": "42" + "version": "12345" } HTTP/1.1 200 OK diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index c98c5a380..a883c8add 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -21,7 +21,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import PresenceState -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, StoreError, SynapseError from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection from synapse.events.utils import ( format_event_for_client_v2_without_room_id, @@ -119,25 +119,32 @@ class SyncRestServlet(RestServlet): request_key = (user, timeout, since, filter_id, full_state, device_id) - if filter_id: - if filter_id.startswith("{"): - try: - filter_object = json.loads(filter_id) - set_timeline_upper_limit( - filter_object, self.hs.config.filter_timeline_limit - ) - except Exception: - raise SynapseError(400, "Invalid filter JSON") - self.filtering.check_valid_filter(filter_object) - filter = FilterCollection(filter_object) - else: - filter = yield self.filtering.get_user_filter(user.localpart, filter_id) + if filter_id is None: + filter_collection = DEFAULT_FILTER_COLLECTION + elif filter_id.startswith("{"): + try: + filter_object = json.loads(filter_id) + set_timeline_upper_limit( + filter_object, self.hs.config.filter_timeline_limit + ) + except Exception: + raise SynapseError(400, "Invalid filter JSON") + self.filtering.check_valid_filter(filter_object) + filter_collection = FilterCollection(filter_object) else: - filter = DEFAULT_FILTER_COLLECTION + try: + filter_collection = yield self.filtering.get_user_filter( + user.localpart, filter_id + ) + except StoreError as err: + if err.code != 404: + raise + # fix up the description and errcode to be more useful + raise SynapseError(400, "No such filter", errcode=Codes.INVALID_PARAM) sync_config = SyncConfig( user=user, - filter_collection=filter, + filter_collection=filter_collection, is_guest=requester.is_guest, request_key=request_key, device_id=device_id, @@ -171,7 +178,7 @@ class SyncRestServlet(RestServlet): time_now = self.clock.time_msec() response_content = yield self.encode_response( - time_now, sync_result, requester.access_token_id, filter + time_now, sync_result, requester.access_token_id, filter_collection ) return 200, response_content diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 0058b6b45..1044ae7b4 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -48,7 +48,24 @@ class VersionsRestServlet(RestServlet): "r0.5.0", ], # as per MSC1497: - "unstable_features": {"m.lazy_load_members": True}, + "unstable_features": { + "m.lazy_load_members": True, + # as per MSC2190, as amended by MSC2264 + # to be removed in r0.6.0 + "m.id_access_token": True, + # Advertise to clients that they need not include an `id_server` + # parameter during registration or password reset, as Synapse now decides + # itself which identity server to use (or none at all). + # + # This is also used by a client when they wish to bind a 3PID to their + # account, but not bind it to an identity server, the endpoint for which + # also requires `id_server`. If the homeserver is handling 3PID + # verification itself, there is no need to ask the user for `id_server` to + # be supplied. + "m.require_identity_server": False, + # as per MSC2290 + "m.separate_add_and_bind": True, + }, }, ) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 5fefee4dd..65bbf0007 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -195,7 +195,7 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam respond_404(request) return - logger.debug("Responding to media request with responder %s") + logger.debug("Responding to media request with responder %s", responder) add_file_headers(request, media_type, file_size, upload_name) try: with responder: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 7a56cd4b6..0c68c3aad 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -270,7 +270,7 @@ class PreviewUrlResource(DirectServeResource): logger.debug("Calculated OG for %s as %s" % (url, og)) - jsonog = json.dumps(og).encode("utf8") + jsonog = json.dumps(og) # store OG in history-aware DB cache yield self.store.store_url_cache( @@ -283,7 +283,7 @@ class PreviewUrlResource(DirectServeResource): media_info["created_ts"], ) - return jsonog + return jsonog.encode("utf8") @defer.inlineCallbacks def _download_url(self, url, user): diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index c995d7e04..8cf415e29 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -82,13 +82,21 @@ class Thumbnailer(object): else: return (max_height * self.width) // self.height, max_height + def _resize(self, width, height): + # 1-bit or 8-bit color palette images need converting to RGB + # otherwise they will be scaled using nearest neighbour which + # looks awful + if self.image.mode in ["1", "P"]: + self.image = self.image.convert("RGB") + return self.image.resize((width, height), Image.ANTIALIAS) + def scale(self, width, height, output_type): """Rescales the image to the given dimensions. Returns: BytesIO: the bytes of the encoded image ready to be written to disk """ - scaled = self.image.resize((width, height), Image.ANTIALIAS) + scaled = self._resize(width, height) return self._encode_image(scaled, output_type) def crop(self, width, height, output_type): @@ -107,13 +115,13 @@ class Thumbnailer(object): """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width - scaled_image = self.image.resize((width, scaled_height), Image.ANTIALIAS) + scaled_image = self._resize(width, scaled_height) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) else: scaled_width = (height * self.width) // self.height - scaled_image = self.image.resize((scaled_width, height), Image.ANTIALIAS) + scaled_image = self._resize(scaled_width, height) crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 5d76bbdf6..83d005812 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -17,7 +17,7 @@ import logging from twisted.web.server import NOT_DONE_YET -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError from synapse.http.server import ( DirectServeResource, respond_with_json, @@ -56,7 +56,11 @@ class UploadResource(DirectServeResource): if content_length is None: raise SynapseError(msg="Request must specify a Content-Length", code=400) if int(content_length) > self.max_upload_size: - raise SynapseError(msg="Upload request body is too large", code=413) + raise SynapseError( + msg="Upload request body is too large", + code=413, + errcode=Codes.TOO_LARGE, + ) upload_name = parse_string(request, b"filename", encoding=None) if upload_name: diff --git a/synapse/server.py b/synapse/server.py index 9e28dba2b..1fcc7375d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -221,6 +221,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.admin_redaction_ratelimiter = Ratelimiter() self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -279,6 +280,9 @@ class HomeServer(object): def get_registration_ratelimiter(self): return self.registration_ratelimiter + def get_admin_redaction_ratelimiter(self): + return self.admin_redaction_ratelimiter + def build_federation_client(self): return FederationClient(self) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 2b0f4c79e..dc9f5a900 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,7 @@ from synapse.state import v1, v2 from synapse.util.async_helpers import Linearizer from synapse.util.caches import get_cache_factor_for from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.metrics import Measure +from synapse.util.metrics import Measure, measure_func logger = logging.getLogger(__name__) @@ -191,11 +191,22 @@ class StateHandler(object): return joined_users @defer.inlineCallbacks - def get_current_hosts_in_room(self, room_id, latest_event_ids=None): - if not latest_event_ids: - latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) - logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) + def get_current_hosts_in_room(self, room_id): + event_ids = yield self.store.get_latest_event_ids_in_room(room_id) + return (yield self.get_hosts_in_room_at_events(room_id, event_ids)) + + @defer.inlineCallbacks + def get_hosts_in_room_at_events(self, room_id, event_ids): + """Get the hosts that were in a room at the given event ids + + Args: + room_id (str): + event_ids (list[str]): + + Returns: + Deferred[list[str]]: the hosts in the room at the given events + """ + entry = yield self.resolve_state_groups_for_events(room_id, event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) return joined_hosts @@ -344,6 +355,7 @@ class StateHandler(object): return context + @measure_func() @defer.inlineCallbacks def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js index e02663f50..276c271bb 100644 --- a/synapse/static/client/login/js/login.js +++ b/synapse/static/client/login/js/login.js @@ -62,7 +62,7 @@ var show_login = function() { $("#sso_flow").show(); } - if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) { + if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas && !matrixLogin.serverAcceptsSso) { $("#no_login_types").show(); } }; diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abe16334e..f5906fcd5 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -20,6 +20,7 @@ import random import sys import threading import time +from typing import Iterable, Tuple from six import PY2, iteritems, iterkeys, itervalues from six.moves import builtins, intern, range @@ -30,7 +31,7 @@ from prometheus_client import Histogram from twisted.internet import defer from synapse.api.errors import StoreError -from synapse.logging.context import LoggingContext, PreserveLoggingContext +from synapse.logging.context import LoggingContext, make_deferred_yieldable from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id @@ -550,8 +551,9 @@ class SQLBaseStore(object): return func(conn, *args, **kwargs) - with PreserveLoggingContext(): - result = yield self._db_pool.runWithConnection(inner_func, *args, **kwargs) + result = yield make_deferred_yieldable( + self._db_pool.runWithConnection(inner_func, *args, **kwargs) + ) return result @@ -1162,19 +1164,18 @@ class SQLBaseStore(object): if not iterable: return [] - sql = "SELECT %s FROM %s" % (", ".join(retcols), table) - - clauses = [] - values = [] - clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable))) - values.extend(iterable) + clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clauses = [clause] for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) values.append(value) - if clauses: - sql = "%s WHERE %s" % (sql, " AND ".join(clauses)) + sql = "SELECT %s FROM %s WHERE %s" % ( + ", ".join(retcols), + table, + " AND ".join(clauses), + ) txn.execute(sql, values) return cls.cursor_to_dict(txn) @@ -1323,10 +1324,8 @@ class SQLBaseStore(object): sql = "DELETE FROM %s" % table - clauses = [] - values = [] - clauses.append("%s IN (%s)" % (column, ",".join("?" for _ in iterable))) - values.extend(iterable) + clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clauses = [clause] for key, value in iteritems(keyvalues): clauses.append("%s = ?" % (key,)) @@ -1693,3 +1692,30 @@ def db_to_json(db_content): except Exception: logging.warning("Tried to decode '%r' as JSON and failed", db_content) raise + + +def make_in_list_sql_clause( + database_engine, column: str, iterable: Iterable +) -> Tuple[str, Iterable]: + """Returns an SQL clause that checks the given column is in the iterable. + + On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres + it expands to `column = ANY(?)`. While both DBs support the `IN` form, + using the `ANY` form on postgres means that it views queries with + different length iterables as the same, helping the query stats. + + Args: + database_engine + column: Name of the column + iterable: The values to check the column against. + + Returns: + A tuple of SQL query and the args + """ + + if database_engine.supports_using_any_list: + # This should hopefully be faster, but also makes postgres query + # stats easier to understand. + return "%s = ANY(?)" % (column,), [list(iterable)] + else: + return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index e5f0668f0..80b57a948 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -140,7 +140,7 @@ class BackgroundUpdateStore(SQLBaseStore): "background_updates", keyvalues=None, retcol="1", - desc="check_background_updates", + desc="has_completed_background_updates", ) if not updates: self._all_done = True @@ -148,6 +148,26 @@ class BackgroundUpdateStore(SQLBaseStore): return False + async def has_completed_background_update(self, update_name) -> bool: + """Check if the given background update has finished running. + """ + + if self._all_done: + return True + + if update_name in self._background_update_queue: + return False + + update_exists = await self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="1", + desc="has_completed_background_update", + allow_none=True, + ) + + return not update_exists + @defer.inlineCallbacks def do_next_background_update(self, desired_duration_ms): """Does some amount of work on the next queued background update @@ -218,7 +238,7 @@ class BackgroundUpdateStore(SQLBaseStore): duration_ms = time_stop - time_start logger.info( - "Updating %r. Updated %r items in %rms." + "Running background update %r. Processed %r items in %rms." " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", update_name, items_updated, diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6db8c5407..067820a5d 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,7 +19,7 @@ from six import iteritems from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -33,14 +33,9 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 -class ClientIpStore(background_updates.BackgroundUpdateStore): +class ClientIpBackgroundUpdateStore(background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): - - self.client_ip_last_seen = Cache( - name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR - ) - - super(ClientIpStore, self).__init__(db_conn, hs) + super(ClientIpBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "user_ips_device_index", @@ -85,14 +80,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique ) - # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) - self._batch_row_update = {} - - self._client_ip_looper = self._clock.looping_call( - self._update_client_ips_batch, 5 * 1000 - ) - self.hs.get_reactor().addSystemEventTrigger( - "before", "shutdown", self._update_client_ips_batch + # Update the last seen info in devices. + self.register_background_update_handler( + "devices_last_seen", self._devices_last_seen_update ) @defer.inlineCallbacks @@ -293,6 +283,110 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): return batch_size + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + # This consists of two queries: + # + # 1. The sub-query searches for the next N devices and joins + # against user_ips to find the max last_seen associated with + # that device. + # 2. The outer query then joins again against user_ips on + # user/device/last_seen. This *should* hopefully only + # return one row, but if it does return more than one then + # we'll just end up updating the same device row multiple + # times, which is fine. + + if self.database_engine.supports_tuple_comparison: + where_clause = "(user_id, device_id) > (?, ?)" + where_args = [last_user_id, last_device_id] + else: + # We explicitly do a `user_id >= ? AND (...)` here to ensure + # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` + # makes it hard for query optimiser to tell that it can use the + # index on user_id + where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" + where_args = [last_user_id, last_user_id, last_device_id] + + sql = """ + SELECT + last_seen, ip, user_agent, user_id, device_id + FROM ( + SELECT + user_id, device_id, MAX(u.last_seen) AS last_seen + FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE %(where_clause)s + GROUP BY user_id, device_id + ORDER BY user_id ASC, device_id ASC + LIMIT ? + ) c + INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) + """ % { + "where_clause": where_clause + } + txn.execute(sql, where_args + [batch_size]) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated + + +class ClientIpStore(ClientIpBackgroundUpdateStore): + def __init__(self, db_conn, hs): + + self.client_ip_last_seen = Cache( + name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR + ) + + super(ClientIpStore, self).__init__(db_conn, hs) + + self.user_ips_max_age = hs.config.user_ips_max_age + + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) + self._batch_row_update = {} + + self._client_ip_looper = self._clock.looping_call( + self._update_client_ips_batch, 5 * 1000 + ) + self.hs.get_reactor().addSystemEventTrigger( + "before", "shutdown", self._update_client_ips_batch + ) + + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def insert_client_ip( self, user_id, access_token, ip, user_agent, device_id, now=None @@ -314,20 +408,19 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @wrap_as_background_process("update_client_ips") def _update_client_ips_batch(self): # If the DB pool has already terminated, don't try updating if not self.hs.get_db_pool().running: return - def update(): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update - ) + to_update = self._batch_row_update + self._batch_row_update = {} - return run_as_background_process("update_client_ips", update) + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + ) def _update_client_ips_batch_txn(self, txn, to_update): if "user_ips" in self._unsafe_to_upsert_tables or ( @@ -354,6 +447,21 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): }, lock=False, ) + + # Technically an access token might not be associated with + # a device so we need to check. + if device_id: + self._simple_upsert_txn( + txn, + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + values={ + "user_agent": user_agent, + "last_seen": last_seen, + "ip": ip, + }, + lock=False, + ) except Exception as e: # Failed to upsert, log and continue logger.error("Failed to insert client IP %r: %r", entry, e) @@ -372,19 +480,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): keys giving the column names """ - res = yield self.runInteraction( - "get_last_client_ip_by_device", - self._get_last_client_ip_by_device_txn, - user_id, - device_id, - retcols=( - "user_id", - "access_token", - "ip", - "user_agent", - "device_id", - "last_seen", - ), + keyvalues = {"user_id": user_id} + if device_id is not None: + keyvalues["device_id"] = device_id + + res = yield self._simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), ) ret = {(d["user_id"], d["device_id"]): d for d in res} @@ -403,42 +506,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } return ret - @classmethod - def _get_last_client_ip_by_device_txn(cls, txn, user_id, device_id, retcols): - where_clauses = [] - bindings = [] - if device_id is None: - where_clauses.append("user_id = ?") - bindings.extend((user_id,)) - else: - where_clauses.append("(user_id = ? AND device_id = ?)") - bindings.extend((user_id, device_id)) - - if not where_clauses: - return [] - - inner_select = ( - "SELECT MAX(last_seen) mls, user_id, device_id FROM user_ips " - "WHERE %(where)s " - "GROUP BY user_id, device_id" - ) % {"where": " OR ".join(where_clauses)} - - sql = ( - "SELECT %(retcols)s FROM user_ips " - "JOIN (%(inner_select)s) ips ON" - " user_ips.last_seen = ips.mls AND" - " user_ips.user_id = ips.user_id AND" - " (user_ips.device_id = ips.device_id OR" - " (user_ips.device_id IS NULL AND ips.device_id IS NULL)" - " )" - ) % { - "retcols": ",".join("user_ips." + c for c in retcols), - "inner_select": inner_select, - } - - txn.execute(sql, bindings) - return cls.cursor_to_dict(txn) - @defer.inlineCallbacks def get_user_ip_and_agents(self, user): user_id = user.to_string() @@ -470,3 +537,45 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) + + @wrap_as_background_process("prune_old_user_ips") + async def _prune_old_user_ips(self): + """Removes entries in user IPs older than the configured period. + """ + + if self.user_ips_max_age is None: + # Nothing to do + return + + if not await self.has_completed_background_update("devices_last_seen"): + # Only start pruning if we have finished populating the devices + # last seen info. + return + + # We do a slightly funky SQL delete to ensure we don't try and delete + # too much at once (as the table may be very large from before we + # started pruning). + # + # This works by finding the max last_seen that is less than the given + # time, but has no more than N rows before it, deleting all rows with + # a lesser last_seen time. (We COALESCE so that the sub-SELECT always + # returns exactly one row). + sql = """ + DELETE FROM user_ips + WHERE last_seen <= ( + SELECT COALESCE(MAX(last_seen), -1) + FROM ( + SELECT last_seen FROM user_ips + WHERE last_seen <= ? + ORDER BY last_seen ASC + LIMIT 5000 + ) AS u + ) + """ + + timestamp = self.clock.time_msec() - self.user_ips_max_age + + def _prune_old_user_ips_txn(txn): + txn.execute(sql, (timestamp,)) + + await self.runInteraction("_prune_old_user_ips", _prune_old_user_ips_txn) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 6b7458304..f04aad074 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -20,7 +20,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.logging.opentracing import log_kv, set_tag, trace -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache @@ -208,11 +208,11 @@ class DeviceInboxWorkerStore(SQLBaseStore): ) -class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): +class DeviceInboxBackgroundUpdateStore(BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): - super(DeviceInboxStore, self).__init__(db_conn, hs) + super(DeviceInboxBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_inbox_stream_index", @@ -225,6 +225,26 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) + @defer.inlineCallbacks + def _background_drop_index_device_inbox(self, progress, batch_size): + def reindex_txn(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") + txn.close() + + yield self.runWithConnection(reindex_txn) + + yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) + + return 1 + + +class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): + DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" + + def __init__(self, db_conn, hs): + super(DeviceInboxStore, self).__init__(db_conn, hs) + # Map of (user_id, device_id) to the last stream_id that has been # deleted up to. This is so that we can no op deletions. self._last_device_delete_cache = ExpiringCache( @@ -358,15 +378,15 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): else: if not devices: continue - sql = ( - "SELECT device_id FROM devices" - " WHERE user_id = ? AND device_id IN (" - + ",".join("?" * len(devices)) - + ")" + + clause, args = make_in_list_sql_clause( + txn.database_engine, "device_id", devices ) + sql = "SELECT device_id FROM devices WHERE user_id = ? AND " + clause + # TODO: Maybe this needs to be done in batches if there are # too many local devices for a given user. - txn.execute(sql, [user_id] + devices) + txn.execute(sql, [user_id] + list(args)) for row in txn: # Only insert into the local inbox if the device exists on # this server @@ -435,16 +455,3 @@ class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): return self.runInteraction( "get_all_new_device_messages", get_all_new_device_messages_txn ) - - @defer.inlineCallbacks - def _background_drop_index_device_inbox(self, progress, batch_size): - def reindex_txn(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") - txn.close() - - yield self.runWithConnection(reindex_txn) - - yield self._end_background_update(self.DEVICE_INBOX_STREAM_ID) - - return 1 diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 2db3ef400..f7a354234 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -30,7 +30,12 @@ from synapse.logging.opentracing import ( whitelisted_homeserver, ) from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import Cache, SQLBaseStore, db_to_json +from synapse.storage._base import ( + Cache, + SQLBaseStore, + db_to_json, + make_in_list_sql_clause, +) from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList @@ -487,11 +492,14 @@ class DeviceWorkerStore(SQLBaseStore): sql = """ SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? - AND user_id IN (%s) + AND """ for chunk in batch_iter(to_check, 100): - txn.execute(sql % (",".join("?" for _ in chunk),), (from_key,) + chunk) + clause, args = make_in_list_sql_clause( + txn.database_engine, "user_id", chunk + ) + txn.execute(sql + clause, (from_key,) + tuple(args)) changes.update(user_id for user_id, in txn) return changes @@ -573,17 +581,9 @@ class DeviceWorkerStore(SQLBaseStore): return results -class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): +class DeviceBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(DeviceStore, self).__init__(db_conn, hs) - - # Map of (user_id, device_id) -> bool. If there is an entry that implies - # the device exists. - self.device_id_exists_cache = Cache( - name="device_id_exists", keylen=2, max_entries=10000 - ) - - self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + super(DeviceBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( "device_lists_stream_idx", @@ -616,6 +616,31 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): self._drop_device_list_streams_non_unique_indexes, ) + @defer.inlineCallbacks + def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") + txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) + return 1 + + +class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(DeviceStore, self).__init__(db_conn, hs) + + # Map of (user_id, device_id) -> bool. If there is an entry that implies + # the device exists. + self.device_id_exists_cache = Cache( + name="device_id_exists", keylen=2, max_entries=10000 + ) + + self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000) + @defer.inlineCallbacks def store_device(self, user_id, device_id, initial_device_display_name): """Ensure the given device is known; add it to the store if not @@ -987,15 +1012,3 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "_prune_old_outbound_device_pokes", _prune_txn, ) - - @defer.inlineCallbacks - def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): - def f(conn): - txn = conn.cursor() - txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id") - txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id") - txn.close() - - yield self.runWithConnection(f) - yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) - return 1 diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 3c82f789f..ebcd1c9ea 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -42,7 +42,8 @@ class EndToEndKeyWorkerStore(SQLBaseStore): This option only takes effect if include_all_devices is true. Returns: Dict mapping from user-id to dict mapping from device_id to - dict containing "key_json", "device_display_name". + key data. The key data will be a dict in the same format as the + DeviceKeys type returned by POST /_matrix/client/r0/keys/query. """ set_tag("query_list", query_list) if not query_list: @@ -56,11 +57,20 @@ class EndToEndKeyWorkerStore(SQLBaseStore): include_deleted_devices, ) + # Build the result structure, un-jsonify the results, and add the + # "unsigned" section + rv = {} for user_id, device_keys in iteritems(results): + rv[user_id] = {} for device_id, device_info in iteritems(device_keys): - device_info["keys"] = db_to_json(device_info.pop("key_json")) + r = db_to_json(device_info.pop("key_json")) + r["unsigned"] = {} + display_name = device_info["device_display_name"] + if display_name is not None: + r["unsigned"]["device_display_name"] = display_name + rv[user_id][device_id] = r - return results + return rv @trace def _get_e2e_device_keys_txn( diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 289b6bc28..b7c4eda33 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -22,6 +22,13 @@ class PostgresEngine(object): def __init__(self, database_module, database_config): self.module = database_module self.module.extensions.register_type(self.module.extensions.UNICODE) + + # Disables passing `bytes` to txn.execute, c.f. #6186. If you do + # actually want to use bytes than wrap it in `bytearray`. + def _disable_bytes_adapter(_): + raise Exception("Passing bytes to DB is disabled.") + + self.module.extensions.register_adapter(bytes, _disable_bytes_adapter) self.synchronous_commit = database_config.get("synchronous_commit", True) self._version = None # unknown as yet @@ -72,6 +79,19 @@ class PostgresEngine(object): """ return True + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? + """ + return True + + @property + def supports_using_any_list(self): + """Do we support using `a = ANY(?)` and passing a list + """ + return True + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index e9b9caa49..ddad17dc5 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -38,6 +38,20 @@ class Sqlite3Engine(object): """ return self.module.sqlite_version_info >= (3, 24, 0) + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires + SQLite 3.15+. + """ + return self.module.sqlite_version_info >= (3, 15, 0) + + @property + def supports_using_any_list(self): + """Do we support using `a = ANY(?)` and passing a list + """ + return False + def check_database(self, txn): pass diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 4f500d893..47cc10d32 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging import random @@ -24,7 +25,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.signatures import SignatureWorkerStore from synapse.util.caches.descriptors import cached @@ -67,7 +68,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas else: results = set() - base_sql = "SELECT auth_id FROM event_auth WHERE event_id IN (%s)" + base_sql = "SELECT auth_id FROM event_auth WHERE " front = set(event_ids) while front: @@ -75,7 +76,10 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas front_list = list(front) chunks = [front_list[x : x + 100] for x in range(0, len(front), 100)] for chunk in chunks: - txn.execute(base_sql % (",".join(["?"] * len(chunk)),), chunk) + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", chunk + ) + txn.execute(base_sql + clause, list(args)) new_front.update([r[0] for r in txn]) new_front -= results @@ -190,12 +194,13 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas room_id, ) - def get_rooms_with_many_extremities(self, min_count, limit): + def get_rooms_with_many_extremities(self, min_count, limit, room_id_filter): """Get the top rooms with at least N extremities. Args: min_count (int): The minimum number of extremities limit (int): The maximum number of rooms to return. + room_id_filter (iterable[str]): room_ids to exclude from the results Returns: Deferred[list]: At most `limit` room IDs that have at least @@ -203,15 +208,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas """ def _get_rooms_with_many_extremities_txn(txn): + where_clause = "1=1" + if room_id_filter: + where_clause = "room_id NOT IN (%s)" % ( + ",".join("?" for _ in room_id_filter), + ) + sql = """ SELECT room_id FROM event_forward_extremities + WHERE %s GROUP BY room_id HAVING count(*) > ? ORDER BY count(*) DESC LIMIT ? - """ + """ % ( + where_clause, + ) - txn.execute(sql, (min_count, limit)) + query_args = list(itertools.chain(room_id_filter, [min_count, limit])) + txn.execute(sql, query_args) return [room_id for room_id, in txn] return self.runInteraction( diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1958afe1d..ee49ef235 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -33,11 +33,13 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events.utils import prune_event_dict from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.logging.utils import log_function from synapse.metrics import BucketCollector from synapse.metrics.background_process_metrics import run_as_background_process from synapse.state import StateResolutionStore +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.events_worker import EventsWorkerStore @@ -262,6 +264,14 @@ class EventsStore( hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000) + def _censor_redactions(): + return run_as_background_process( + "_censor_redactions", self._censor_redactions + ) + + if self.hs.config.redaction_retention_period is not None: + hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000) + @defer.inlineCallbacks def _read_forward_extremities(self): def fetch(txn): @@ -632,14 +642,16 @@ class EventsStore( LEFT JOIN rejections USING (event_id) LEFT JOIN event_json USING (event_id) WHERE - prev_event_id IN (%s) - AND NOT events.outlier + NOT events.outlier AND rejections.event_id IS NULL - """ % ( - ",".join("?" for _ in batch), + AND + """ + + clause, args = make_in_list_sql_clause( + self.database_engine, "prev_event_id", batch ) - txn.execute(sql, batch) + txn.execute(sql + clause, args) results.extend(r[0] for r in txn if not json.loads(r[1]).get("soft_failed")) for chunk in batch_iter(event_ids, 100): @@ -686,13 +698,15 @@ class EventsStore( LEFT JOIN rejections USING (event_id) LEFT JOIN event_json USING (event_id) WHERE - event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_recursively_check), + NOT events.outlier + AND + """ + + clause, args = make_in_list_sql_clause( + self.database_engine, "event_id", to_recursively_check ) - txn.execute(sql, to_recursively_check) + txn.execute(sql + clause, args) to_recursively_check = [] for event_id, prev_event_id, metadata, rejected in txn: @@ -1380,6 +1394,18 @@ class EventsStore( ], ) + for event, _ in events_and_contexts: + if not event.internal_metadata.is_redacted(): + # If we're persisting an unredacted event we go and ensure + # that we mark any redactions that reference this event as + # requiring censoring. + self._simple_update_txn( + txn, + table="redactions", + keyvalues={"redacts": event.event_id}, + updatevalues={"have_censored": False}, + ) + def _store_rejected_events_txn(self, txn, events_and_contexts): """Add rows to the 'rejections' table for received events which were rejected @@ -1522,10 +1548,14 @@ class EventsStore( " FROM events as e" " LEFT JOIN rejections as rej USING (event_id)" " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(ev_map)),) + " WHERE " + ) - txn.execute(sql, list(ev_map)) + clause, args = make_in_list_sql_clause( + self.database_engine, "e.event_id", list(ev_map) + ) + + txn.execute(sql + clause, args) rows = self.cursor_to_dict(txn) for row in rows: event = ev_map[row["event_id"]] @@ -1543,11 +1573,100 @@ class EventsStore( def _store_redaction(self, txn, event): # invalidate the cache for the redacted event txn.call_after(self._invalidate_get_event_cache, event.redacts) - txn.execute( - "INSERT INTO redactions (event_id, redacts) VALUES (?,?)", - (event.event_id, event.redacts), + + self._simple_insert_txn( + txn, + table="redactions", + values={ + "event_id": event.event_id, + "redacts": event.redacts, + "received_ts": self._clock.time_msec(), + }, ) + @defer.inlineCallbacks + def _censor_redactions(self): + """Censors all redactions older than the configured period that haven't + been censored yet. + + By censor we mean update the event_json table with the redacted event. + + Returns: + Deferred + """ + + if self.hs.config.redaction_retention_period is None: + return + + before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period + + # We fetch all redactions that: + # 1. point to an event we have, + # 2. has a received_ts from before the cut off, and + # 3. we haven't yet censored. + # + # This is limited to 100 events to ensure that we don't try and do too + # much at once. We'll get called again so this should eventually catch + # up. + sql = """ + SELECT redactions.event_id, redacts FROM redactions + LEFT JOIN events AS original_event ON ( + redacts = original_event.event_id + ) + WHERE NOT have_censored + AND redactions.received_ts <= ? + ORDER BY redactions.received_ts ASC + LIMIT ? + """ + + rows = yield self._execute( + "_censor_redactions_fetch", None, sql, before_ts, 100 + ) + + updates = [] + + for redaction_id, event_id in rows: + redaction_event = yield self.get_event(redaction_id, allow_none=True) + original_event = yield self.get_event( + event_id, allow_rejected=True, allow_none=True + ) + + # The SQL above ensures that we have both the redaction and + # original event, so if the `get_event` calls return None it + # means that the redaction wasn't allowed. Either way we know that + # the result won't change so we mark the fact that we've checked. + if ( + redaction_event + and original_event + and original_event.internal_metadata.is_redacted() + ): + # Redaction was allowed + pruned_json = encode_json(prune_event_dict(original_event.get_dict())) + else: + # Redaction wasn't allowed + pruned_json = None + + updates.append((redaction_id, event_id, pruned_json)) + + def _update_censor_txn(txn): + for redaction_id, event_id, pruned_json in updates: + if pruned_json: + self._simple_update_one_txn( + txn, + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": pruned_json}, + ) + + self._simple_update_one_txn( + txn, + table="redactions", + keyvalues={"event_id": redaction_id}, + updatevalues={"have_censored": True}, + ) + + yield self.runInteraction("_update_censor_txn", _update_censor_txn) + @defer.inlineCallbacks def count_daily_messages(self): """ @@ -2139,11 +2258,12 @@ class EventsStore( sql = """ SELECT DISTINCT state_group FROM event_to_state_groups LEFT JOIN events_to_purge AS ep USING (event_id) - WHERE state_group IN (%s) AND ep.event_id IS NULL - """ % ( - ",".join("?" for _ in current_search), + WHERE ep.event_id IS NULL AND + """ + clause, args = make_in_list_sql_clause( + txn.database_engine, "state_group", current_search ) - txn.execute(sql, list(current_search)) + txn.execute(sql + clause, list(args)) referenced = set(sg for sg, in txn) referenced_groups |= referenced diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 6587f31e2..31ea6f917 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore logger = logging.getLogger(__name__) @@ -67,6 +68,23 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update ) + self.register_background_update_handler( + "redactions_received_ts", self._redactions_received_ts + ) + + # This index gets deleted in `event_fix_redactions_bytes` update + self.register_background_index_update( + "event_fix_redactions_bytes_create_index", + index_name="redactions_censored_redacts", + table="redactions", + columns=["redacts"], + where_clause="have_censored", + ) + + self.register_background_update_handler( + "event_fix_redactions_bytes", self._event_fix_redactions_bytes + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -308,12 +326,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): INNER JOIN event_json USING (event_id) LEFT JOIN rejections USING (event_id) WHERE - prev_event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_check), + NOT events.outlier + AND + """ + clause, args = make_in_list_sql_clause( + self.database_engine, "prev_event_id", to_check ) - txn.execute(sql, to_check) + txn.execute(sql + clause, list(args)) for prev_event_id, event_id, metadata, rejected in txn: if event_id in graph: @@ -397,3 +416,90 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) return num_handled + + @defer.inlineCallbacks + def _redactions_received_ts(self, progress, batch_size): + """Handles filling out the `received_ts` column in redactions. + """ + last_event_id = progress.get("last_event_id", "") + + def _redactions_received_ts_txn(txn): + # Fetch the set of event IDs that we want to update + sql = """ + SELECT event_id FROM redactions + WHERE event_id > ? + ORDER BY event_id ASC + LIMIT ? + """ + + txn.execute(sql, (last_event_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + upper_event_id, = rows[-1] + + # Update the redactions with the received_ts. + # + # Note: Not all events have an associated received_ts, so we + # fallback to using origin_server_ts. If we for some reason don't + # have an origin_server_ts, lets just use the current timestamp. + # + # We don't want to leave it null, as then we'll never try and + # censor those redactions. + sql = """ + UPDATE redactions + SET received_ts = ( + SELECT COALESCE(received_ts, origin_server_ts, ?) FROM events + WHERE events.event_id = redactions.event_id + ) + WHERE ? <= event_id AND event_id <= ? + """ + + txn.execute(sql, (self._clock.time_msec(), last_event_id, upper_event_id)) + + self._background_update_progress_txn( + txn, "redactions_received_ts", {"last_event_id": upper_event_id} + ) + + return len(rows) + + count = yield self.runInteraction( + "_redactions_received_ts", _redactions_received_ts_txn + ) + + if not count: + yield self._end_background_update("redactions_received_ts") + + return count + + @defer.inlineCallbacks + def _event_fix_redactions_bytes(self, progress, batch_size): + """Undoes hex encoded censored redacted event JSON. + """ + + def _event_fix_redactions_bytes_txn(txn): + # This update is quite fast due to new index. + txn.execute( + """ + UPDATE event_json + SET + json = convert_from(json::bytea, 'utf8') + FROM redactions + WHERE + redactions.have_censored + AND event_json.event_id = redactions.redacts + AND json NOT LIKE '{%'; + """ + ) + + txn.execute("DROP INDEX redactions_censored_redacts") + + yield self.runInteraction( + "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn + ) + + yield self._end_background_update("event_fix_redactions_bytes") + + return 1 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index c6fa7f82f..4c4b76bd9 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -31,12 +31,11 @@ from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.types import get_domain_from_id from synapse.util import batch_iter from synapse.util.metrics import Measure -from ._base import SQLBaseStore - logger = logging.getLogger(__name__) @@ -238,6 +237,20 @@ class EventsWorkerStore(SQLBaseStore): # we have to recheck auth now. if not allow_rejected and entry.event.type == EventTypes.Redaction: + if not hasattr(entry.event, "redacts"): + # A redacted redaction doesn't have a `redacts` key, in + # which case lets just withhold the event. + # + # Note: Most of the time if the redactions has been + # redacted we still have the un-redacted event in the DB + # and so we'll still see the `redacts` key. However, this + # isn't always true e.g. if we have censored the event. + logger.debug( + "Withholding redaction event %s as we don't have redacts key", + event_id, + ) + continue + redacted_event_id = entry.event.redacts event_map = yield self._get_events_from_cache_or_db([redacted_event_id]) original_event_entry = event_map.get(redacted_event_id) @@ -609,10 +622,14 @@ class EventsWorkerStore(SQLBaseStore): " rej.reason " " FROM event_json as e" " LEFT JOIN rejections as rej USING (event_id)" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) + " WHERE " + ) - txn.execute(sql, evs) + clause, args = make_in_list_sql_clause( + txn.database_engine, "e.event_id", evs + ) + + txn.execute(sql + clause, args) for row in txn: event_id = row[0] @@ -626,11 +643,11 @@ class EventsWorkerStore(SQLBaseStore): } # check for redactions - redactions_sql = ( - "SELECT event_id, redacts FROM redactions WHERE redacts IN (%s)" - ) % (",".join(["?"] * len(evs)),) + redactions_sql = "SELECT event_id, redacts FROM redactions WHERE " - txn.execute(redactions_sql, evs) + clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs) + + txn.execute(redactions_sql + clause, args) for (redacter, redacted) in txn: d = event_dict.get(redacted) @@ -739,10 +756,11 @@ class EventsWorkerStore(SQLBaseStore): results = set() def have_seen_events_txn(txn, chunk): - sql = "SELECT event_id FROM events as e WHERE e.event_id IN (%s)" % ( - ",".join("?" * len(chunk)), + sql = "SELECT event_id FROM events as e WHERE " + clause, args = make_in_list_sql_clause( + txn.database_engine, "e.event_id", chunk ) - txn.execute(sql, chunk) + txn.execute(sql + clause, args) for (event_id,) in txn: results.add(event_id) diff --git a/synapse/storage/filtering.py b/synapse/storage/filtering.py index 23b48f6ce..7c2a7da83 100644 --- a/synapse/storage/filtering.py +++ b/synapse/storage/filtering.py @@ -51,7 +51,7 @@ class FilteringStore(SQLBaseStore): "SELECT filter_id FROM user_filters " "WHERE user_id = ? AND filter_json = ?" ) - txn.execute(sql, (user_localpart, def_json)) + txn.execute(sql, (user_localpart, bytearray(def_json))) filter_id_response = txn.fetchone() if filter_id_response is not None: return filter_id_response[0] @@ -68,7 +68,7 @@ class FilteringStore(SQLBaseStore): "INSERT INTO user_filters (user_id, filter_id, filter_json)" "VALUES(?, ?, ?)" ) - txn.execute(sql, (user_localpart, filter_id, def_json)) + txn.execute(sql, (user_localpart, filter_id, bytearray(def_json))) return filter_id diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6b1238ce4..84b5f3ad5 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -15,11 +15,9 @@ from synapse.storage.background_updates import BackgroundUpdateStore -class MediaRepositoryStore(BackgroundUpdateStore): - """Persistence for attachments and avatars""" - +class MediaRepositoryBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(MediaRepositoryStore, self).__init__(db_conn, hs) + super(MediaRepositoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_index_update( update_name="local_media_repository_url_idx", @@ -29,6 +27,13 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause="url_cache IS NOT NULL", ) + +class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): + """Persistence for attachments and avatars""" + + def __init__(self, db_conn, hs): + super(MediaRepositoryStore, self).__init__(db_conn, hs) + def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 752e9788a..3803604be 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -32,7 +32,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() self.hs = hs - self.reserved_users = () # Do not add more reserved users than the total allowable number self._new_transaction( dbconn, @@ -51,7 +50,6 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn (cursor): threepids (list[dict]): List of threepid dicts to reserve """ - reserved_user_list = [] for tp in threepids: user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"]) @@ -60,10 +58,8 @@ class MonthlyActiveUsersStore(SQLBaseStore): is_support = self.is_support_user_txn(txn, user_id) if not is_support: self.upsert_monthly_active_user_txn(txn, user_id) - reserved_user_list.append(user_id) else: logger.warning("mau limit reserved threepid %s not found in db" % tp) - self.reserved_users = tuple(reserved_user_list) @defer.inlineCallbacks def reap_monthly_active_users(self): @@ -74,8 +70,11 @@ class MonthlyActiveUsersStore(SQLBaseStore): Deferred[] """ - def _reap_users(txn): - # Purge stale users + def _reap_users(txn, reserved_users): + """ + Args: + reserved_users (tuple): reserved users to preserve + """ thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) query_args = [thirty_days_ago] @@ -83,20 +82,19 @@ class MonthlyActiveUsersStore(SQLBaseStore): # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: + if len(reserved_users) > 0: # questionmarks is a hack to overcome sqlite not supporting # tuples in 'WHERE IN %s' - questionmarks = "?" * len(self.reserved_users) + question_marks = ",".join("?" * len(reserved_users)) - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ",".join(questionmarks) - ) + query_args.extend(reserved_users) + sql = base_sql + " AND user_id NOT IN ({})".format(question_marks) else: sql = base_sql txn.execute(sql, query_args) + max_mau_value = self.hs.config.max_mau_value if self.hs.config.limit_usage_by_mau: # If MAU user count still exceeds the MAU threshold, then delete on # a least recently active basis. @@ -106,31 +104,52 @@ class MonthlyActiveUsersStore(SQLBaseStore): # While Postgres does not require 'LIMIT', but also does not support # negative LIMIT values. So there is no way to write it that both can # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if len(reserved_users) == 0: + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? ) - """ + """ + txn.execute(sql, (max_mau_value,)) # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ",".join(questionmarks) - ) else: - sql = base_sql - txn.execute(sql, query_args) + # Must be >= 0 for postgres + num_of_non_reserved_users_to_remove = max( + max_mau_value - len(reserved_users), 0 + ) - yield self.runInteraction("reap_monthly_active_users", _reap_users) + # It is important to filter reserved users twice to guard + # against the case where the reserved user is present in the + # SELECT, meaning that a legitmate mau is deleted. + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + WHERE user_id NOT IN ({}) + ORDER BY timestamp DESC + LIMIT ? + ) + AND user_id NOT IN ({}) + """.format( + question_marks, question_marks + ) + + query_args = [ + *reserved_users, + num_of_non_reserved_users_to_remove, + *reserved_users, + ] + + txn.execute(sql, query_args) + + reserved_users = yield self.get_registered_reserved_users() + yield self.runInteraction( + "reap_monthly_active_users", _reap_users, reserved_users + ) # It seems poor to invalidate the whole cache, Postgres supports # 'Returning' which would allow me to invalidate only the # specific users, but sqlite has no way to do this and instead @@ -159,21 +178,25 @@ class MonthlyActiveUsersStore(SQLBaseStore): return self.runInteraction("count_users", _count_users) @defer.inlineCallbacks - def get_registered_reserved_users_count(self): - """Of the reserved threepids defined in config, how many are associated + def get_registered_reserved_users(self): + """Of the reserved threepids defined in config, which are associated with registered users? Returns: - Defered[int]: Number of real reserved users + Defered[list]: Real reserved users """ - count = 0 - for tp in self.hs.config.mau_limits_reserved_threepids: + users = [] + + for tp in self.hs.config.mau_limits_reserved_threepids[ + : self.hs.config.max_mau_value + ]: user_id = yield self.hs.get_datastore().get_user_id_by_threepid( tp["medium"], tp["address"] ) if user_id: - count = count + 1 - return count + users.append(user_id) + + return users @defer.inlineCallbacks def upsert_monthly_active_user(self, user_id): diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py index 5db6f2d84..3a641f538 100644 --- a/synapse/storage/presence.py +++ b/synapse/storage/presence.py @@ -18,11 +18,10 @@ from collections import namedtuple from twisted.internet import defer from synapse.api.constants import PresenceState +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedList -from ._base import SQLBaseStore - class UserPresenceState( namedtuple( @@ -119,14 +118,13 @@ class PresenceStore(SQLBaseStore): ) # Delete old rows to stop database from getting really big - sql = ( - "DELETE FROM presence_stream WHERE" " stream_id < ?" " AND user_id IN (%s)" - ) + sql = "DELETE FROM presence_stream WHERE stream_id < ? AND " for states in batch_iter(presence_states, 50): - args = [stream_id] - args.extend(s.user_id for s in states) - txn.execute(sql % (",".join("?" for _ in states),), args) + clause, args = make_in_list_sql_clause( + self.database_engine, "user_id", [s.user_id for s in states] + ) + txn.execute(sql + clause, [stream_id] + list(args)) def get_all_presence_updates(self, last_id, current_id): if last_id == current_id: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index a6517c4cf..c4e24edff 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -183,8 +183,8 @@ class PushRulesWorkerStore( return results @defer.inlineCallbacks - def move_push_rule_from_room_to_room(self, new_room_id, user_id, rule): - """Move a single push rule from one room to another for a specific user. + def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule): + """Copy a single push rule from one room to another for a specific user. Args: new_room_id (str): ID of the new room. @@ -209,14 +209,11 @@ class PushRulesWorkerStore( actions=rule["actions"], ) - # Delete push rule for the old room - yield self.delete_push_rule(user_id, rule["rule_id"]) - @defer.inlineCallbacks - def move_push_rules_from_room_to_room_for_user( + def copy_push_rules_from_room_to_room_for_user( self, old_room_id, new_room_id, user_id ): - """Move all of the push rules from one room to another for a specific + """Copy all of the push rules from one room to another for a specific user. Args: @@ -227,15 +224,14 @@ class PushRulesWorkerStore( # Retrieve push rules for this user user_push_rules = yield self.get_push_rules_for_user(user_id) - # Get rules relating to the old room, move them to the new room, then - # delete them from the old room + # Get rules relating to the old room and copy them to the new room for rule in user_push_rules: conditions = rule.get("conditions", []) if any( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions ): - self.move_push_rule_from_room_to_room(new_room_id, user_id, rule) + yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 3e0e834a6..b12e80440 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -241,7 +241,7 @@ class PusherStore(PusherWorkerStore): "device_display_name": device_display_name, "ts": pushkey_ts, "lang": lang, - "data": encode_canonical_json(data), + "data": bytearray(encode_canonical_json(data)), "last_stream_ordering": last_stream_ordering, "profile_tag": profile_tag, "id": stream_id, diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 290ddb30e..0c24430f2 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,12 +21,11 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import SQLBaseStore -from .util.id_generators import StreamIdGenerator - logger = logging.getLogger(__name__) @@ -217,24 +216,26 @@ class ReceiptsWorkerStore(SQLBaseStore): def f(txn): if from_key: - sql = ( - "SELECT * FROM receipts_linearized WHERE" - " room_id IN (%s) AND stream_id > ? AND stream_id <= ?" - ) % (",".join(["?"] * len(room_ids))) - args = list(room_ids) - args.extend([from_key, to_key]) + sql = """ + SELECT * FROM receipts_linearized WHERE + stream_id > ? AND stream_id <= ? AND + """ + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) - txn.execute(sql, args) + txn.execute(sql + clause, [from_key, to_key] + list(args)) else: - sql = ( - "SELECT * FROM receipts_linearized WHERE" - " room_id IN (%s) AND stream_id <= ?" - ) % (",".join(["?"] * len(room_ids))) + sql = """ + SELECT * FROM receipts_linearized WHERE + stream_id <= ? AND + """ - args = list(room_ids) - args.append(to_key) + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) - txn.execute(sql, args) + txn.execute(sql + clause, [to_key] + list(args)) return self.cursor_to_dict(txn) @@ -433,13 +434,19 @@ class ReceiptsStore(ReceiptsWorkerStore): # we need to points in graph -> linearized form. # TODO: Make this better. def graph_to_linear(txn): - query = ( - "SELECT event_id WHERE room_id = ? AND stream_ordering IN (" - " SELECT max(stream_ordering) WHERE event_id IN (%s)" - ")" - ) % (",".join(["?"] * len(event_ids))) + clause, args = make_in_list_sql_clause( + self.database_engine, "event_id", event_ids + ) - txn.execute(query, [room_id] + event_ids) + sql = """ + SELECT event_id WHERE room_id = ? AND stream_ordering IN ( + SELECT max(stream_ordering) WHERE %s + ) + """ % ( + clause, + ) + + txn.execute(sql, [room_id] + list(args)) rows = txn.fetchall() if rows: return rows[0][0] diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 5138792a5..6c5b29288 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -22,9 +22,10 @@ from six import iterkeys from six.moves import range from twisted.internet import defer +from twisted.internet.defer import Deferred from synapse.api.constants import UserTypes -from synapse.api.errors import Codes, StoreError, ThreepidValidationError +from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore @@ -322,6 +323,19 @@ class RegistrationWorkerStore(SQLBaseStore): return None + @cachedInlineCallbacks() + def is_real_user(self, user_id): + """Determines if the user is a real user, ie does not have a 'user_type'. + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user 'user_type' is null or empty string + """ + res = yield self.runInteraction("is_real_user", self.is_real_user_txn, user_id) + return res + @cachedInlineCallbacks() def is_support_user(self, user_id): """Determines if the user is of type UserTypes.SUPPORT @@ -337,6 +351,16 @@ class RegistrationWorkerStore(SQLBaseStore): ) return res + def is_real_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return res is None + def is_support_user_txn(self, txn, user_id): res = self._simple_select_one_onecol_txn( txn=txn, @@ -361,6 +385,26 @@ class RegistrationWorkerStore(SQLBaseStore): return self.runInteraction("get_users_by_id_case_insensitive", f) + async def get_user_by_external_id( + self, auth_provider: str, external_id: str + ) -> str: + """Look up a user by their external auth id + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + + Returns: + str|None: the mxid of the user, or None if they are not known + """ + return await self._simple_select_one_onecol( + table="user_external_ids", + keyvalues={"auth_provider": auth_provider, "external_id": external_id}, + retcol="user_id", + allow_none=True, + desc="get_user_by_external_id", + ) + @defer.inlineCallbacks def count_all_users(self): """Counts all users registered on the homeserver.""" @@ -421,6 +465,20 @@ class RegistrationWorkerStore(SQLBaseStore): ret = yield self.runInteraction("count_users", _count_users) return ret + @defer.inlineCallbacks + def count_real_users(self): + """Counts all users without a special user_type registered on the homeserver.""" + + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_real_users", _count_users) + return ret + @defer.inlineCallbacks def find_next_generated_user_id_localpart(self): """ @@ -435,7 +493,9 @@ class RegistrationWorkerStore(SQLBaseStore): """ def _find_next_generated_user_id(txn): - txn.execute("SELECT name FROM users") + # We bound between '@1' and '@a' to avoid pulling the entire table + # out. + txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'") regex = re.compile(r"^@(\d+):") @@ -458,7 +518,7 @@ class RegistrationWorkerStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_user_id_by_threepid(self, medium, address, require_verified=False): + def get_user_id_by_threepid(self, medium, address): """Returns user id from threepid Args: @@ -549,6 +609,26 @@ class RegistrationWorkerStore(SQLBaseStore): desc="add_user_bound_threepid", ) + def user_get_bound_threepids(self, user_id): + """Get the threepids that a user has bound to an identity server through the homeserver + The homeserver remembers where binds to an identity server occurred. Using this + method can retrieve those threepids. + + Args: + user_id (str): The ID of the user to retrieve threepids for + + Returns: + Deferred[list[dict]]: List of dictionaries containing the following: + medium (str): The medium of the threepid (e.g "email") + address (str): The address of the threepid (e.g "bob@example.com") + """ + return self._simple_select_list( + table="user_threepid_id_server", + keyvalues={"user_id": user_id}, + retcols=["medium", "address"], + desc="user_get_bound_threepids", + ) + def remove_user_bound_threepid(self, user_id, medium, address, id_server): """The server proxied an unbind request to the given identity server on behalf of the given user, so we remove the mapping of threepid to @@ -618,24 +698,37 @@ class RegistrationWorkerStore(SQLBaseStore): self, medium, client_secret, address=None, sid=None, validated=True ): """Gets a session_id and last_send_attempt (if available) for a - client_secret/medium/(address|session_id) combo + combination of validation metadata Args: medium (str|None): The medium of the 3PID address (str|None): The address of the 3PID sid (str|None): The ID of the validation session - client_secret (str|None): A unique string provided by the client to - help identify this validation attempt + client_secret (str): A unique string provided by the client to help identify this + validation attempt validated (bool|None): Whether sessions should be filtered by whether they have been validated already or not. None to perform no filtering Returns: - deferred {str, int}|None: A dict containing the - latest session_id and send_attempt count for this 3PID. - Otherwise None if there hasn't been a previous attempt + Deferred[dict|None]: A dict containing the following: + * address - address of the 3pid + * medium - medium of the 3pid + * client_secret - a secret provided by the client for this validation session + * session_id - ID of the validation session + * send_attempt - a number serving to dedupe send attempts for this session + * validated_at - timestamp of when this session was validated if so + + Otherwise None if a validation session is not found """ - keyvalues = {"medium": medium, "client_secret": client_secret} + if not client_secret: + raise SynapseError( + 400, "Missing parameter: client_secret", errcode=Codes.MISSING_PARAM + ) + + keyvalues = {"client_secret": client_secret} + if medium: + keyvalues["medium"] = medium if address: keyvalues["address"] = address if sid: @@ -694,13 +787,14 @@ class RegistrationWorkerStore(SQLBaseStore): ) -class RegistrationStore( +class RegistrationBackgroundUpdateStore( RegistrationWorkerStore, background_updates.BackgroundUpdateStore ): def __init__(self, db_conn, hs): - super(RegistrationStore, self).__init__(db_conn, hs) + super(RegistrationBackgroundUpdateStore, self).__init__(db_conn, hs) self.clock = hs.get_clock() + self.config = hs.config self.register_background_index_update( "access_tokens_device_index", @@ -716,8 +810,6 @@ class RegistrationStore( columns=["creation_ts"], ) - self._account_validity = hs.config.account_validity - # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. @@ -731,17 +823,6 @@ class RegistrationStore( "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) - # Create a background job for culling expired 3PID validity tokens - def start_cull(): - # run as a background process to make sure that the database transactions - # have a logcontext to report to - return run_as_background_process( - "cull_expired_threepid_validation_tokens", - self.cull_expired_threepid_validation_tokens, - ) - - hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) - @defer.inlineCallbacks def _background_update_set_deactivated_flag(self, progress, batch_size): """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1 @@ -774,7 +855,7 @@ class RegistrationStore( rows = self.cursor_to_dict(txn) if not rows: - return True + return True, 0 rows_processed_nb = 0 @@ -790,18 +871,66 @@ class RegistrationStore( ) if batch_size > len(rows): - return True + return True, len(rows) else: - return False + return False, len(rows) - end = yield self.runInteraction( + end, nb_processed = yield self.runInteraction( "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn ) if end: yield self._end_background_update("users_set_deactivated_flag") - return batch_size + return nb_processed + + @defer.inlineCallbacks + def _bg_user_threepids_grandfather(self, progress, batch_size): + """We now track which identity servers a user binds their 3PID to, so + we need to handle the case of existing bindings where we didn't track + this. + + We do this by grandfathering in existing user threepids assuming that + they used one of the server configured trusted identity servers. + """ + id_servers = set(self.config.trusted_third_party_id_servers) + + def _bg_user_threepids_grandfather_txn(txn): + sql = """ + INSERT INTO user_threepid_id_server + (user_id, medium, address, id_server) + SELECT user_id, medium, address, ? + FROM user_threepids + """ + + txn.executemany(sql, [(id_server,) for id_server in id_servers]) + + if id_servers: + yield self.runInteraction( + "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn + ) + + yield self._end_background_update("user_threepids_grandfather") + + return 1 + + +class RegistrationStore(RegistrationBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RegistrationStore, self).__init__(db_conn, hs) + + self._account_validity = hs.config.account_validity + + # Create a background job for culling expired 3PID validity tokens + def start_cull(): + # run as a background process to make sure that the database transactions + # have a logcontext to report to + return run_as_background_process( + "cull_expired_threepid_validation_tokens", + self.cull_expired_threepid_validation_tokens, + ) + + hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms): @@ -962,6 +1091,26 @@ class RegistrationStore( self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def record_user_external_id( + self, auth_provider: str, external_id: str, user_id: str + ) -> Deferred: + """Record a mapping from an external user id to a mxid + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + user_id: complete mxid that it is mapped to + """ + return self._simple_insert( + table="user_external_ids", + values={ + "auth_provider": auth_provider, + "external_id": external_id, + "user_id": user_id, + }, + desc="record_user_external_id", + ) + def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this @@ -1131,36 +1280,6 @@ class RegistrationStore( desc="get_users_pending_deactivation", ) - @defer.inlineCallbacks - def _bg_user_threepids_grandfather(self, progress, batch_size): - """We now track which identity servers a user binds their 3PID to, so - we need to handle the case of existing bindings where we didn't track - this. - - We do this by grandfathering in existing user threepids assuming that - they used one of the server configured trusted identity servers. - """ - id_servers = set(self.config.trusted_third_party_id_servers) - - def _bg_user_threepids_grandfather_txn(txn): - sql = """ - INSERT INTO user_threepid_id_server - (user_id, medium, address, id_server) - SELECT user_id, medium, address, ? - FROM user_threepids - """ - - txn.executemany(sql, [(id_server,) for id_server in id_servers]) - - if id_servers: - yield self.runInteraction( - "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn - ) - - yield self._end_background_update("user_threepids_grandfather") - - return 1 - def validate_threepid_session(self, session_id, client_secret, token, current_ts): """Attempt to validate a threepid session using a token @@ -1172,6 +1291,10 @@ class RegistrationStore( current_ts (int): The current unix time in milliseconds. Used for checking token expiry status + Raises: + ThreepidValidationError: if a matching validation token was not found or has + expired + Returns: deferred str|None: A str representing a link to redirect the user to if there is one. @@ -1348,17 +1471,6 @@ class RegistrationStore( self.clock.time_msec(), ) - def set_user_deactivated_status_txn(self, txn, user_id, deactivated): - self._simple_update_one_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - updatevalues={"deactivated": 1 if deactivated else 0}, - ) - self._invalidate_cache_and_stream( - txn, self.get_user_deactivated_status, (user_id,) - ) - @defer.inlineCallbacks def set_user_deactivated_status(self, user_id, deactivated): """Set the `deactivated` property for the provided user to the provided value. @@ -1374,3 +1486,14 @@ class RegistrationStore( user_id, deactivated, ) + + def set_user_deactivated_status_txn(self, txn, user_id, deactivated): + self._simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"deactivated": 1 if deactivated else 0}, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_deactivated_status, (user_id,) + ) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 08e13f3a3..43cc56fa6 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +17,7 @@ import collections import logging import re +from typing import Optional, Tuple from canonicaljson import json @@ -24,6 +26,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore +from synapse.types import ThirdPartyInstanceID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks logger = logging.getLogger(__name__) @@ -63,103 +66,196 @@ class RoomWorkerStore(SQLBaseStore): desc="get_public_room_ids", ) - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. + def count_public_rooms(self, network_tuple, ignore_non_federatable): + """Counts the number of public rooms as tracked in the room_stats_current + and room_stats_state table. Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. + network_tuple (ThirdPartyInstanceID|None) + ignore_non_federatable (bool): If true filters out non-federatable rooms """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, - network_tuple=network_tuple, - ) - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis - } + def _count_public_rooms_txn(txn): + query_args = [] - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. - - sql = """ - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list """ - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id), + sql = """ + SELECT + COALESCE(COUNT(*), 0) + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + """ % { + "published_sql": published_sql + } + + txn.execute(sql, query_args) + return txn.fetchone()[0] + + return self.runInteraction("count_public_rooms", _count_public_rooms_txn) + + @defer.inlineCallbacks + def get_largest_public_rooms( + self, + network_tuple: Optional[ThirdPartyInstanceID], + search_filter: Optional[dict], + limit: Optional[int], + bounds: Optional[Tuple[int, str]], + forwards: bool, + ignore_non_federatable: bool = False, + ): + """Gets the largest public rooms (where largest is in terms of joined + members, as tracked in the statistics table). + + Args: + network_tuple + search_filter + limit: Maxmimum number of rows to return, unlimited otherwise. + bounds: An uppoer or lower bound to apply to result set if given, + consists of a joined member count and room_id (these are + excluded from result set). + forwards: true iff going forwards, going backwards otherwise + ignore_non_federatable: If true filters out non-federatable rooms. + + Returns: + Rooms in order: biggest number of joined users first. + We then arbitrarily use the room_id as a tie breaker. + + """ + + where_clauses = [] + query_args = [] + + if network_tuple: + if network_tuple.appservice_id: + published_sql = """ + SELECT room_id from appservice_room_list + WHERE appservice_id = ? AND network_id = ? + """ + query_args.append(network_tuple.appservice_id) + query_args.append(network_tuple.network_id) + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + """ + else: + published_sql = """ + SELECT room_id FROM rooms WHERE is_public + UNION SELECT room_id from appservice_room_list + """ + + # Work out the bounds if we're given them, these bounds look slightly + # odd, but are designed to help query planner use indices by pulling + # out a common bound. + if bounds: + last_joined_members, last_room_id = bounds + if forwards: + where_clauses.append( + """ + joined_members <= ? AND ( + joined_members < ? OR room_id < ? + ) + """ ) else: - txn.execute(sql % ("AND appservice_id IS NULL",), (stream_id,)) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results + where_clauses.append( + """ + joined_members >= ? AND ( + joined_members > ? OR room_id > ? + ) + """ + ) - logger.info("Executing full list") + query_args += [last_joined_members, last_joined_members, last_room_id] - sql = """ - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) + if ignore_non_federatable: + where_clauses.append("is_federatable") + + if search_filter and search_filter.get("generic_search_term", None): + search_term = "%" + search_filter["generic_search_term"] + "%" + + where_clauses.append( + """ + ( + name LIKE ? + OR topic LIKE ? + OR canonical_alias LIKE ? + ) + """ + ) + query_args += [search_term, search_term, search_term] + + where_clause = "" + if where_clauses: + where_clause = " AND " + " AND ".join(where_clauses) + + sql = """ + SELECT + room_id, name, topic, canonical_alias, joined_members, + avatar, history_visibility, joined_members, guest_access + FROM ( + %(published_sql)s + ) published + INNER JOIN room_stats_state USING (room_id) + INNER JOIN room_stats_current USING (room_id) + WHERE + ( + join_rules = 'public' OR history_visibility = 'world_readable' + ) + AND joined_members > 0 + %(where_clause)s + ORDER BY joined_members %(dir)s, room_id %(dir)s + """ % { + "published_sql": published_sql, + "where_clause": where_clause, + "dir": "DESC" if forwards else "ASC", + } + + if limit is not None: + query_args.append(limit) + + sql += """ + LIMIT ? """ - txn.execute(sql, (stream_id,)) + def _get_largest_public_rooms_txn(txn): + txn.execute(sql, query_args) - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) + results = self.cursor_to_dict(txn) + + if not forwards: + results.reverse() return results - def get_public_room_changes(self, prev_stream_id, new_stream_id, network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) - - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) - - now_rooms_visible = set(rm for rm, vis in now_rooms_dict.items() if vis) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis - ) - - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms - - return newly_visible, newly_unpublished - - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn + ret_val = yield self.runInteraction( + "get_largest_public_rooms", _get_largest_public_rooms_txn ) + defer.returnValue(ret_val) @cached(max_entries=10000) def is_room_blocked(self, room_id): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4df8ebdac..ff6348782 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -26,13 +26,15 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage._base import LoggingTransaction +from synapse.storage._base import LoggingTransaction, make_in_list_sql_clause +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList +from synapse.util.metrics import Measure from synapse.util.stringutils import to_ascii logger = logging.getLogger(__name__) @@ -370,6 +372,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): results = [] if membership_list: if self._current_state_events_membership_up_to_date: + clause, args = make_in_list_sql_clause( + self.database_engine, "c.membership", membership_list + ) sql = """ SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering FROM current_state_events AS c @@ -377,11 +382,14 @@ class RoomMemberWorkerStore(EventsWorkerStore): WHERE c.type = 'm.room.member' AND state_key = ? - AND c.membership IN (%s) + AND %s """ % ( - ",".join("?" * len(membership_list)) + clause, ) else: + clause, args = make_in_list_sql_clause( + self.database_engine, "m.membership", membership_list + ) sql = """ SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering FROM current_state_events AS c @@ -390,12 +398,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): WHERE c.type = 'm.room.member' AND state_key = ? - AND m.membership IN (%s) + AND %s """ % ( - ",".join("?" * len(membership_list)) + clause, ) - txn.execute(sql, (user_id, *membership_list)) + txn.execute(sql, (user_id, *args)) results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)] if do_invite: @@ -483,6 +491,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) return result + @defer.inlineCallbacks def get_joined_users_from_state(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -492,9 +501,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_users_from_context( - room_id, state_group, state_entry.state, context=state_entry - ) + with Measure(self._clock, "get_joined_users_from_state"): + return ( + yield self._get_joined_users_from_context( + room_id, state_group, state_entry.state, context=state_entry + ) + ) @cachedInlineCallbacks( num_args=2, cache_context=True, iterable=True, max_entries=100000 @@ -567,25 +579,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): missing_member_event_ids.append(event_id) if missing_member_event_ids: - rows = yield self._simple_select_many_batch( - table="room_memberships", - column="event_id", - iterable=missing_member_event_ids, - retcols=("user_id", "display_name", "avatar_url"), - keyvalues={"membership": Membership.JOIN}, - batch_size=500, - desc="_get_joined_users_from_context", - ) - - users_in_room.update( - { - to_ascii(row["user_id"]): ProfileInfo( - avatar_url=to_ascii(row["avatar_url"]), - display_name=to_ascii(row["display_name"]), - ) - for row in rows - } + event_to_memberships = yield self._get_joined_profiles_from_event_ids( + missing_member_event_ids ) + users_in_room.update((row for row in event_to_memberships.values() if row)) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: @@ -597,6 +594,47 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room + @cached(max_entries=10000) + def _get_joined_profile_from_event_id(self, event_id): + raise NotImplementedError() + + @cachedList( + cached_method_name="_get_joined_profile_from_event_id", + list_name="event_ids", + inlineCallbacks=True, + ) + def _get_joined_profiles_from_event_ids(self, event_ids): + """For given set of member event_ids check if they point to a join + event and if so return the associated user and profile info. + + Args: + event_ids (Iterable[str]): The member event IDs to lookup + + Returns: + Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID + to `user_id` and ProfileInfo (or None if not join event). + """ + + rows = yield self._simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=event_ids, + retcols=("user_id", "display_name", "avatar_url", "event_id"), + keyvalues={"membership": Membership.JOIN}, + batch_size=500, + desc="_get_membership_from_event_ids", + ) + + return { + row["event_id"]: ( + row["user_id"], + ProfileInfo( + avatar_url=row["avatar_url"], display_name=row["display_name"] + ), + ) + for row in rows + } + @cachedInlineCallbacks(max_entries=10000) def is_host_joined(self, room_id, host): if "%" in host or "_" in host: @@ -669,6 +707,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True + @defer.inlineCallbacks def get_joined_hosts(self, room_id, state_entry): state_group = state_entry.state_group if not state_group: @@ -678,9 +717,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_hosts( - room_id, state_group, state_entry.state, state_entry=state_entry - ) + with Measure(self._clock, "get_joined_hosts"): + return ( + yield self._get_joined_hosts( + room_id, state_group, state_entry.state, state_entry=state_entry + ) + ) @cachedInlineCallbacks(num_args=2, max_entries=10000, iterable=True) # @defer.inlineCallbacks @@ -785,9 +827,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return set(room_ids) -class RoomMemberStore(RoomMemberWorkerStore): +class RoomMemberBackgroundUpdateStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) + super(RoomMemberBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile ) @@ -803,112 +845,6 @@ class RoomMemberStore(RoomMemberWorkerStore): where_clause="forgotten = 1", ) - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ], - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, - event.internal_metadata.stream_ordering, - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. If the event is an - # outlier it is only current if its an "out of band membership", - # like a remote invite or a rejection of a remote invite. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_out_of_band_membership() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - }, - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute( - sql, - ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - ), - ) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, (stream_ordering, True, room_id, user_id)) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.get_forgotten_rooms_for_user, (user_id,) - ) - - return self.runInteraction("forget_membership", f) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -1043,6 +979,117 @@ class RoomMemberStore(RoomMemberWorkerStore): return row_count +class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ], + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, + event.internal_metadata.stream_ordering, + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. If the event is an + # outlier it is only current if its an "out of band membership", + # like a remote invite or a rejection of a remote invite. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_out_of_band_membership() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + }, + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute( + sql, + ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + ), + ) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, (stream_ordering, True, room_id, user_id)) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.get_forgotten_rooms_for_user, (user_id,) + ) + + return self.runInteraction("forget_membership", f) + + class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates via state deltas. diff --git a/synapse/storage/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/schema/delta/56/destinations_failure_ts.sql new file mode 100644 index 000000000..f00889290 --- /dev/null +++ b/synapse/storage/schema/delta/56/destinations_failure_ts.sql @@ -0,0 +1,25 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Record the timestamp when a given server started failing + */ +ALTER TABLE destinations ADD failure_ts BIGINT; + +/* as a rough approximation, we assume that the server started failing at + * retry_interval before the last retry + */ +UPDATE destinations SET failure_ts = retry_last_ts - retry_interval + WHERE retry_last_ts > 0; diff --git a/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres new file mode 100644 index 000000000..b9bbb18a9 --- /dev/null +++ b/synapse/storage/schema/delta/56/destinations_retry_interval_type.sql.postgres @@ -0,0 +1,18 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We want to store large retry intervals so we upgrade the column from INT +-- to BIGINT. We don't need to do this on SQLite. +ALTER TABLE destinations ALTER retry_interval SET DATA TYPE BIGINT; diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql new file mode 100644 index 000000000..dfa902d0b --- /dev/null +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -0,0 +1,24 @@ +/* Copyright 2019 Matrix.org Foundation CIC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Track last seen information for a device in the devices table, rather +-- than relying on it being in the user_ips table (which we want to be able +-- to purge old entries from) +ALTER TABLE devices ADD COLUMN last_seen BIGINT; +ALTER TABLE devices ADD COLUMN ip TEXT; +ALTER TABLE devices ADD COLUMN user_agent TEXT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('devices_last_seen', '{}'); diff --git a/synapse/storage/schema/delta/56/public_room_list_idx.sql b/synapse/storage/schema/delta/56/public_room_list_idx.sql new file mode 100644 index 000000000..7be31ffeb --- /dev/null +++ b/synapse/storage/schema/delta/56/public_room_list_idx.sql @@ -0,0 +1,16 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX public_room_list_stream_network ON public_room_list_stream (appservice_id, network_id, room_id); diff --git a/synapse/storage/schema/delta/56/redaction_censor.sql b/synapse/storage/schema/delta/56/redaction_censor.sql new file mode 100644 index 000000000..fe51b0230 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false; +CREATE INDEX redactions_have_censored ON redactions(event_id) WHERE not have_censored; diff --git a/synapse/storage/schema/delta/56/redaction_censor2.sql b/synapse/storage/schema/delta/56/redaction_censor2.sql new file mode 100644 index 000000000..77a5eca49 --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor2.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE redactions ADD COLUMN received_ts BIGINT; +CREATE INDEX redactions_have_censored_ts ON redactions(received_ts) WHERE not have_censored; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('redactions_received_ts', '{}'); diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres new file mode 100644 index 000000000..67471f3ef --- /dev/null +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -0,0 +1,25 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- There was a bug where we may have updated censored redactions as bytes, +-- which can (somehow) cause json to be inserted hex encoded. These updates go +-- and undoes any such hex encoded JSON. + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_fix_redactions_bytes_create_index', '{}'); + +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index'); diff --git a/synapse/storage/schema/delta/56/unique_user_filter_index.py b/synapse/storage/schema/delta/56/unique_user_filter_index.py new file mode 100644 index 000000000..1de8b5496 --- /dev/null +++ b/synapse/storage/schema/delta/56/unique_user_filter_index.py @@ -0,0 +1,52 @@ +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +""" +This migration updates the user_filters table as follows: + + - drops any (user_id, filter_id) duplicates + - makes the columns NON-NULLable + - turns the index into a UNIQUE index +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + select_clause = """ + SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json + FROM user_filters + """ + else: + select_clause = """ + SELECT * FROM user_filters GROUP BY user_id, filter_id + """ + sql = """ + DROP TABLE IF EXISTS user_filters_migration; + DROP INDEX IF EXISTS user_filters_unique; + CREATE TABLE user_filters_migration ( + user_id TEXT NOT NULL, + filter_id BIGINT NOT NULL, + filter_json BYTEA NOT NULL + ); + INSERT INTO user_filters_migration (user_id, filter_id, filter_json) + %s; + CREATE UNIQUE INDEX user_filters_unique ON user_filters_migration + (user_id, filter_id); + DROP TABLE user_filters; + ALTER TABLE user_filters_migration RENAME TO user_filters; + """ % ( + select_clause, + ) + + if isinstance(database_engine, PostgresEngine): + cur.execute(sql) + else: + cur.executescript(sql) diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql new file mode 100644 index 000000000..91390c452 --- /dev/null +++ b/synapse/storage/schema/delta/56/user_external_ids.sql @@ -0,0 +1,24 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * a table which records mappings from external auth providers to mxids + */ +CREATE TABLE IF NOT EXISTS user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); diff --git a/synapse/storage/search.py b/synapse/storage/search.py index df87ab6a6..7695bf09f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -24,6 +24,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.storage._base import make_in_list_sql_clause from synapse.storage.engines import PostgresEngine, Sqlite3Engine from .background_updates import BackgroundUpdateStore @@ -36,7 +37,7 @@ SearchEntry = namedtuple( ) -class SearchStore(BackgroundUpdateStore): +class SearchBackgroundUpdateStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" @@ -44,7 +45,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): - super(SearchStore, self).__init__(db_conn, hs) + super(SearchBackgroundUpdateStore, self).__init__(db_conn, hs) if not hs.config.enable_search: return @@ -289,29 +290,6 @@ class SearchStore(BackgroundUpdateStore): return num_rows - def store_event_search_txn(self, txn, event, key, value): - """Add event to the search table - - Args: - txn (cursor): - event (EventBase): - key (str): - value (str): - """ - self.store_search_entries_txn( - txn, - ( - SearchEntry( - key=key, - value=value, - event_id=event.event_id, - room_id=event.room_id, - stream_ordering=event.internal_metadata.stream_ordering, - origin_server_ts=event.origin_server_ts, - ), - ), - ) - def store_search_entries_txn(self, txn, entries): """Add entries to the search table @@ -358,6 +336,34 @@ class SearchStore(BackgroundUpdateStore): # This should be unreachable. raise Exception("Unrecognized database engine") + +class SearchStore(SearchBackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(SearchStore, self).__init__(db_conn, hs) + + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + self.store_search_entries_txn( + txn, + ( + SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ), + ), + ) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. @@ -380,8 +386,10 @@ class SearchStore(BackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)) - args.extend(room_ids) + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) + clauses = [clause] local_clauses = [] for key in keys: @@ -487,8 +495,10 @@ class SearchStore(BackgroundUpdateStore): # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. if len(room_ids) < 500: - clauses.append("room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)) - args.extend(room_ids) + clause, args = make_in_list_sql_clause( + self.database_engine, "room_id", room_ids + ) + clauses = [clause] local_clauses = [] for key in keys: diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1980a8710..a941a5ae3 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -353,8 +353,158 @@ class StateFilter(object): return member_filter, non_member_filter +class StateGroupBackgroundUpdateStore(SQLBaseStore): + """Defines functions related to state groups needed to run the state backgroud + updates. + """ + + def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ + if isinstance(self.database_engine, PostgresEngine): + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """ + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + + def _get_state_groups_from_groups_txn( + self, txn, groups, state_filter=StateFilter.all() + ): + results = {group: {} for group in groups} + + where_clause, where_args = state_filter.make_sql_filter_clause() + + # Unless the filter clause is empty, we're going to append it after an + # existing where clause + if where_clause: + where_clause = " AND (%s)" % (where_clause,) + + if isinstance(self.database_engine, PostgresEngine): + # Temporarily disable sequential scans in this transaction. This is + # a temporary hack until we can add the right indices in + txn.execute("SET LOCAL enable_seqscan=off") + + # The below query walks the state_group tree so that the "state" + # table includes all state_groups in the tree. It then joins + # against `state_groups_state` to fetch the latest state. + # It assumes that previous state groups are always numerically + # lesser. + # The PARTITION is used to get the event_id in the greatest state + # group for the given type, state_key. + # This may return multiple rows per (type, state_key), but last_value + # should be the same. + sql = """ + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT DISTINCT type, state_key, last_value(event_id) OVER ( + PARTITION BY type, state_key ORDER BY state_group ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS event_id FROM state_groups_state + WHERE state_group IN ( + SELECT state_group FROM state + ) + """ + + for group in groups: + args = [group] + args.extend(where_args) + + txn.execute(sql + where_clause, args) + for row in txn: + typ, state_key, event_id = row + key = (typ, state_key) + results[group][key] = event_id + else: + max_entries_returned = state_filter.max_entries_returned() + + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + for group in groups: + next_group = group + + while next_group: + # We did this before by getting the list of group ids, and + # then passing that list to sqlite to get latest event for + # each (type, state_key). However, that was terribly slow + # without the right indices (which we can't add until + # after we finish deduping state, which requires this func) + args = [next_group] + args.extend(where_args) + + txn.execute( + "SELECT type, state_key, event_id FROM state_groups_state" + " WHERE state_group = ? " + where_clause, + args, + ) + results[group].update( + ((typ, state_key), event_id) + for typ, state_key, event_id in txn + if (typ, state_key) not in results[group] + ) + + # If the number of entries in the (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + max_entries_returned is not None + and len(results[group]) == max_entries_returned + ): + break + + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + + return results + + # this inherits from EventsWorkerStore because it calls self.get_events -class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): +class StateGroupWorkerStore( + EventsWorkerStore, StateGroupBackgroundUpdateStore, SQLBaseStore +): """The parts of StateGroupStore that can be called from workers. """ @@ -694,107 +844,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return results - def _get_state_groups_from_groups_txn( - self, txn, groups, state_filter=StateFilter.all() - ): - results = {group: {} for group in groups} - - where_clause, where_args = state_filter.make_sql_filter_clause() - - # Unless the filter clause is empty, we're going to append it after an - # existing where clause - if where_clause: - where_clause = " AND (%s)" % (where_clause,) - - if isinstance(self.database_engine, PostgresEngine): - # Temporarily disable sequential scans in this transaction. This is - # a temporary hack until we can add the right indices in - txn.execute("SET LOCAL enable_seqscan=off") - - # The below query walks the state_group tree so that the "state" - # table includes all state_groups in the tree. It then joins - # against `state_groups_state` to fetch the latest state. - # It assumes that previous state groups are always numerically - # lesser. - # The PARTITION is used to get the event_id in the greatest state - # group for the given type, state_key. - # This may return multiple rows per (type, state_key), but last_value - # should be the same. - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT DISTINCT type, state_key, last_value(event_id) OVER ( - PARTITION BY type, state_key ORDER BY state_group ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS event_id FROM state_groups_state - WHERE state_group IN ( - SELECT state_group FROM state - ) - """ - - for group in groups: - args = [group] - args.extend(where_args) - - txn.execute(sql + where_clause, args) - for row in txn: - typ, state_key, event_id = row - key = (typ, state_key) - results[group][key] = event_id - else: - max_entries_returned = state_filter.max_entries_returned() - - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - for group in groups: - next_group = group - - while next_group: - # We did this before by getting the list of group ids, and - # then passing that list to sqlite to get latest event for - # each (type, state_key). However, that was terribly slow - # without the right indices (which we can't add until - # after we finish deduping state, which requires this func) - args = [next_group] - args.extend(where_args) - - txn.execute( - "SELECT type, state_key, event_id FROM state_groups_state" - " WHERE state_group = ? " + where_clause, - args, - ) - results[group].update( - ((typ, state_key), event_id) - for typ, state_key, event_id in txn - if (typ, state_key) not in results[group] - ) - - # If the number of entries in the (type,state_key)->event_id dict - # matches the number of (type,state_keys) types we were searching - # for, then we must have found them all, so no need to go walk - # further down the tree... UNLESS our types filter contained - # wildcards (i.e. Nones) in which case we have to do an exhaustive - # search - if ( - max_entries_returned is not None - and len(results[group]) == max_entries_returned - ): - break - - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - - return results - @defer.inlineCallbacks def get_state_for_events(self, event_ids, state_filter=StateFilter.all()): """Given a list of event_ids and type tuples, return a list of state @@ -1238,66 +1287,10 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return self.runInteraction("store_state_group", _store_state_group_txn) - def _count_state_group_hops_txn(self, txn, state_group): - """Given a state group, count how many hops there are in the tree. - This is used to ensure the delta chains don't get too long. - """ - if isinstance(self.database_engine, PostgresEngine): - sql = """ - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT count(*) FROM state; - """ - - txn.execute(sql, (state_group,)) - row = txn.fetchone() - if row and row[0]: - return row[0] - else: - return 0 - else: - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group - count = 0 - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - count += 1 - - return count - - -class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): - """ Keeps track of the state at a given event. - - This is done by the concept of `state groups`. Every event is a assigned - a state group (identified by an arbitrary string), which references a - collection of state events. The current state of an event is then the - collection of state events referenced by the event's state group. - - Hence, every change in the current state causes a new state group to be - generated. However, if no change happens (e.g., if we get a message event - with only one parent it inherits the state group from its parent.) - - There are three tables: - * `state_groups`: Stores group name, first event with in the group and - room id. - * `event_to_state_groups`: Maps events to state groups. - * `state_groups_state`: Maps state group to state events. - """ +class StateBackgroundUpdateStore( + StateGroupBackgroundUpdateStore, BackgroundUpdateStore +): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" @@ -1305,7 +1298,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): - super(StateStore, self).__init__(db_conn, hs) + super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) self.register_background_update_handler( self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, self._background_deduplicate_state, @@ -1327,34 +1320,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_group"], ) - def _store_event_state_mappings_txn(self, txn, events_and_contexts): - state_groups = {} - for event, context in events_and_contexts: - if event.internal_metadata.is_outlier(): - continue - - # if the event was rejected, just give it the same state as its - # predecessor. - if context.rejected: - state_groups[event.event_id] = context.prev_group - continue - - state_groups[event.event_id] = context.state_group - - self._simple_insert_many_txn( - txn, - table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} - for event_id, state_group_id in iteritems(state_groups) - ], - ) - - for event_id, state_group_id in iteritems(state_groups): - txn.call_after( - self._get_state_group_for_event.prefill, (event_id,), state_group_id - ) - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding @@ -1527,3 +1492,54 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): yield self._end_background_update(self.STATE_GROUP_INDEX_UPDATE_NAME) return 1 + + +class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore): + """ Keeps track of the state at a given event. + + This is done by the concept of `state groups`. Every event is a assigned + a state group (identified by an arbitrary string), which references a + collection of state events. The current state of an event is then the + collection of state events referenced by the event's state group. + + Hence, every change in the current state causes a new state group to be + generated. However, if no change happens (e.g., if we get a message event + with only one parent it inherits the state group from its parent.) + + There are three tables: + * `state_groups`: Stores group name, first event with in the group and + room id. + * `event_to_state_groups`: Maps events to state groups. + * `state_groups_state`: Maps state group to state events. + """ + + def __init__(self, db_conn, hs): + super(StateStore, self).__init__(db_conn, hs) + + def _store_event_state_mappings_txn(self, txn, events_and_contexts): + state_groups = {} + for event, context in events_and_contexts: + if event.internal_metadata.is_outlier(): + continue + + # if the event was rejected, just give it the same state as its + # predecessor. + if context.rejected: + state_groups[event.event_id] = context.prev_group + continue + + state_groups[event.event_id] = context.state_group + + self._simple_insert_many_txn( + txn, + table="event_to_state_groups", + values=[ + {"state_group": state_group_id, "event_id": event_id} + for event_id, state_group_id in iteritems(state_groups) + ], + ) + + for event_id, state_group_id in iteritems(state_groups): + txn.call_after( + self._get_state_group_for_event.prefill, (event_id,), state_group_id + ) diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py index 5fdb44210..28f33ec18 100644 --- a/synapse/storage/state_deltas.py +++ b/synapse/storage/state_deltas.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) class StateDeltasStore(SQLBaseStore): - def get_current_state_deltas(self, prev_stream_id): + def get_current_state_deltas(self, prev_stream_id: int, max_stream_id: int): """Fetch a list of room state changes since the given stream id Each entry in the result contains the following fields: @@ -36,15 +36,27 @@ class StateDeltasStore(SQLBaseStore): Args: prev_stream_id (int): point to get changes since (exclusive) + max_stream_id (int): the point that we know has been correctly persisted + - ie, an upper limit to return changes from. Returns: - Deferred[list[dict]]: results + Deferred[tuple[int, list[dict]]: A tuple consisting of: + - the stream id which these results go up to + - list of current_state_delta_stream rows. If it is empty, we are + up to date. """ prev_stream_id = int(prev_stream_id) + + # check we're not going backwards + assert prev_stream_id <= max_stream_id + if not self._curr_state_delta_stream_cache.has_any_entity_changed( prev_stream_id ): - return [] + # if the CSDs haven't changed between prev_stream_id and now, we + # know for certain that they haven't changed between prev_stream_id and + # max_stream_id. + return max_stream_id, [] def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than @@ -54,21 +66,29 @@ class StateDeltasStore(SQLBaseStore): sql = """ SELECT stream_id, count(*) FROM current_state_delta_stream - WHERE stream_id > ? + WHERE stream_id > ? AND stream_id <= ? GROUP BY stream_id ORDER BY stream_id ASC LIMIT 100 """ - txn.execute(sql, (prev_stream_id,)) + txn.execute(sql, (prev_stream_id, max_stream_id)) total = 0 - max_stream_id = prev_stream_id - for max_stream_id, count in txn: + + for stream_id, count in txn: total += count if total > 100: # We arbitarily limit to 100 entries to ensure we don't # select toooo many. + logger.debug( + "Clipping current_state_delta_stream rows to stream_id %i", + stream_id, + ) + clipped_stream_id = stream_id break + else: + # if there's no problem, we may as well go right up to the max_stream_id + clipped_stream_id = max_stream_id # Now actually get the deltas sql = """ @@ -77,8 +97,8 @@ class StateDeltasStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ - txn.execute(sql, (prev_stream_id, max_stream_id)) - return self.cursor_to_dict(txn) + txn.execute(sql, (prev_stream_id, clipped_stream_id)) + return clipped_stream_id, self.cursor_to_dict(txn) return self.runInteraction( "get_current_state_deltas", get_current_state_deltas_txn diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 09190d684..7c224cd3d 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -332,6 +332,9 @@ class StatsStore(StateDeltasStore): def _bulk_update_stats_delta_txn(txn): for stats_type, stats_updates in updates.items(): for stats_id, fields in stats_updates.items(): + logger.info( + "Updating %s stats for %s: %s", stats_type, stats_id, fields + ) self._update_stats_delta_txn( txn, ts=ts, diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b3c3bf55b..289c11739 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -165,7 +165,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("destination", "retry_last_ts", "retry_interval"), + retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -174,12 +174,15 @@ class TransactionStore(SQLBaseStore): else: return None - def set_destination_retry_timings(self, destination, retry_last_ts, retry_interval): + def set_destination_retry_timings( + self, destination, failure_ts, retry_last_ts, retry_interval + ): """Sets the current retry timings for a given destination. Both timings should be zero if retrying is no longer occuring. Args: destination (str) + failure_ts (int|None) - when the server started failing (ms since epoch) retry_last_ts (int) - time of last retry attempt in unix epoch ms retry_interval (int) - how long until next retry in ms """ @@ -189,12 +192,13 @@ class TransactionStore(SQLBaseStore): "set_destination_retry_timings", self._set_destination_retry_timings, destination, + failure_ts, retry_last_ts, retry_interval, ) def _set_destination_retry_timings( - self, txn, destination, retry_last_ts, retry_interval + self, txn, destination, failure_ts, retry_last_ts, retry_interval ): if self.database_engine.can_native_upsert: @@ -202,9 +206,12 @@ class TransactionStore(SQLBaseStore): # resetting it) or greater than the existing retry interval. sql = """ - INSERT INTO destinations (destination, retry_last_ts, retry_interval) - VALUES (?, ?, ?) + INSERT INTO destinations ( + destination, failure_ts, retry_last_ts, retry_interval + ) + VALUES (?, ?, ?, ?) ON CONFLICT (destination) DO UPDATE SET + failure_ts = EXCLUDED.failure_ts, retry_last_ts = EXCLUDED.retry_last_ts, retry_interval = EXCLUDED.retry_interval WHERE @@ -212,7 +219,7 @@ class TransactionStore(SQLBaseStore): OR destinations.retry_interval < EXCLUDED.retry_interval """ - txn.execute(sql, (destination, retry_last_ts, retry_interval)) + txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) return @@ -225,7 +232,7 @@ class TransactionStore(SQLBaseStore): txn, table="destinations", keyvalues={"destination": destination}, - retcols=("retry_last_ts", "retry_interval"), + retcols=("failure_ts", "retry_last_ts", "retry_interval"), allow_none=True, ) @@ -235,6 +242,7 @@ class TransactionStore(SQLBaseStore): table="destinations", values={ "destination": destination, + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, @@ -245,31 +253,12 @@ class TransactionStore(SQLBaseStore): "destinations", keyvalues={"destination": destination}, updatevalues={ + "failure_ts": failure_ts, "retry_last_ts": retry_last_ts, "retry_interval": retry_interval, }, ) - def get_destinations_needing_retry(self): - """Get all destinations which are due a retry for sending a transaction. - - Returns: - list: A list of dicts - """ - - return self.runInteraction( - "get_destinations_needing_retry", self._get_destinations_needing_retry - ) - - def _get_destinations_needing_retry(self, txn): - query = ( - "SELECT * FROM destinations" - " WHERE retry_last_ts > 0 and retry_next_ts < ?" - ) - - txn.execute(query, (self._clock.time_msec(),)) - return self.cursor_to_dict(txn) - def _start_cleanup_transactions(self): return run_as_background_process( "cleanup_transactions", self._cleanup_transactions diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b5188d9be..1b1e4751b 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -32,14 +32,14 @@ logger = logging.getLogger(__name__) TEMP_TABLE = "_temp_populate_user_directory" -class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): +class UserDirectoryBackgroundUpdateStore(StateDeltasStore, BackgroundUpdateStore): # How many records do we calculate before sending it to # add_users_who_share_private_rooms? SHARE_PRIVATE_WORKING_SET = 500 def __init__(self, db_conn, hs): - super(UserDirectoryStore, self).__init__(db_conn, hs) + super(UserDirectoryBackgroundUpdateStore, self).__init__(db_conn, hs) self.server_name = hs.hostname @@ -452,55 +452,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - def remove_from_user_dir(self, user_id): - def _remove_from_user_dir_txn(txn): - self._simple_delete_txn( - txn, table="user_directory", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="user_directory_search", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_private_rooms", - keyvalues={"other_user_id": user_id}, - ) - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) - - @defer.inlineCallbacks - def get_users_in_dir_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - user_ids_share_pub = yield self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_share_priv = yield self._simple_select_onecol( - table="users_who_share_private_rooms", - keyvalues={"room_id": room_id}, - retcol="other_user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids = set(user_ids_share_pub) - user_ids.update(user_ids_share_priv) - - return user_ids - def add_users_who_share_private_room(self, room_id, user_id_tuples): """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. @@ -551,6 +502,98 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) + def delete_all_from_user_dir(self): + """Delete the entire user directory + """ + + def _delete_all_from_user_dir_txn(txn): + txn.execute("DELETE FROM user_directory") + txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") + txn.call_after(self.get_user_in_directory.invalidate_all) + + return self.runInteraction( + "delete_all_from_user_dir", _delete_all_from_user_dir_txn + ) + + @cached() + def get_user_in_directory(self, user_id): + return self._simple_select_one( + table="user_directory", + keyvalues={"user_id": user_id}, + retcols=("display_name", "avatar_url"), + allow_none=True, + desc="get_user_in_directory", + ) + + def update_user_directory_stream_pos(self, stream_id): + return self._simple_update_one( + table="user_directory_stream_pos", + keyvalues={}, + updatevalues={"stream_id": stream_id}, + desc="update_user_directory_stream_pos", + ) + + +class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): + + # How many records do we calculate before sending it to + # add_users_who_share_private_rooms? + SHARE_PRIVATE_WORKING_SET = 500 + + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + def remove_from_user_dir(self, user_id): + def _remove_from_user_dir_txn(txn): + self._simple_delete_txn( + txn, table="user_directory", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="user_directory_search", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, + ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + + return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) + + @defer.inlineCallbacks + def get_users_in_dir_due_to_room(self, room_id): + """Get all user_ids that are in the room directory because they're + in the given room_id + """ + user_ids_share_pub = yield self._simple_select_onecol( + table="users_in_public_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", + keyvalues={"room_id": room_id}, + retcol="other_user_id", + desc="get_users_in_dir_due_to_room", + ) + + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) + + return user_ids + def remove_user_who_share_room(self, user_id, room_id): """ Deletes entries in the users_who_share_*_rooms table. The first @@ -637,31 +680,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): return [room_id for room_id, in rows] - def delete_all_from_user_dir(self): - """Delete the entire user directory - """ - - def _delete_all_from_user_dir_txn(txn): - txn.execute("DELETE FROM user_directory") - txn.execute("DELETE FROM user_directory_search") - txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_private_rooms") - txn.call_after(self.get_user_in_directory.invalidate_all) - - return self.runInteraction( - "delete_all_from_user_dir", _delete_all_from_user_dir_txn - ) - - @cached() - def get_user_in_directory(self, user_id): - return self._simple_select_one( - table="user_directory", - keyvalues={"user_id": user_id}, - retcols=("display_name", "avatar_url"), - allow_none=True, - desc="get_user_in_directory", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -670,14 +688,6 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): desc="get_user_directory_stream_pos", ) - def update_user_directory_stream_pos(self, stream_id): - return self._simple_update_one( - table="user_directory_stream_pos", - keyvalues={}, - updatevalues={"stream_id": stream_id}, - desc="update_user_directory_stream_pos", - ) - @defer.inlineCallbacks def search_user_dir(self, user_id, search_term, limit): """Searches for users in directory diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/user_erasure_store.py index 05cabc228..aa4f0da5f 100644 --- a/synapse/storage/user_erasure_store.py +++ b/synapse/storage/user_erasure_store.py @@ -56,15 +56,15 @@ class UserErasureWorkerStore(SQLBaseStore): # iterate it multiple times, and (b) avoiding duplicates. user_ids = tuple(set(user_ids)) - def _get_erased_users(txn): - txn.execute( - "SELECT user_id FROM erased_users WHERE user_id IN (%s)" - % (",".join("?" * len(user_ids))), - user_ids, - ) - return set(r[0] for r in txn) + rows = yield self._simple_select_many_batch( + table="erased_users", + column="user_id", + iterable=user_ids, + retcols=("user_id",), + desc="are_users_erased", + ) + erased_users = set(row["user_id"] for row in rows) - erased_users = yield self.runInteraction("are_users_erased", _get_erased_users) res = dict((u, u in erased_users) for u in user_ids) return res diff --git a/synapse/types.py b/synapse/types.py index 00bb0743f..aafc3ffe7 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -321,6 +321,7 @@ class StreamToken( ) ): _SEPARATOR = "_" + START = None # type: StreamToken @classmethod def from_string(cls, string): @@ -405,7 +406,7 @@ class RoomStreamToken(namedtuple("_StreamToken", "topological stream")): followed by the "stream_ordering" id of the event it comes after. """ - __slots__ = [] + __slots__ = [] # type: list @classmethod def parse(cls, string): diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index f1c46836b..804dbca44 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -13,12 +13,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import collections import logging from contextlib import contextmanager +from typing import Dict, Sequence, Set, Union from six.moves import range +import attr + from twisted.internet import defer from twisted.internet.defer import CancelledError from twisted.python import failure @@ -213,7 +217,9 @@ class Linearizer(object): # the first element is the number of things executing, and # the second element is an OrderedDict, where the keys are deferreds for the # things blocked from executing. - self.key_to_defer = {} + self.key_to_defer = ( + {} + ) # type: Dict[str, Sequence[Union[int, Dict[defer.Deferred, int]]]] def queue(self, key): # we avoid doing defer.inlineCallbacks here, so that cancellation works correctly. @@ -340,10 +346,10 @@ class ReadWriteLock(object): def __init__(self): # Latest readers queued - self.key_to_current_readers = {} + self.key_to_current_readers = {} # type: Dict[str, Set[defer.Deferred]] # Latest writer queued - self.key_to_current_writer = {} + self.key_to_current_writer = {} # type: Dict[str, defer.Deferred] @defer.inlineCallbacks def read(self, key): @@ -479,3 +485,30 @@ def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): deferred.addCallbacks(success_cb, failure_cb) return new_d + + +@attr.s(slots=True, frozen=True) +class DoneAwaitable(object): + """Simple awaitable that returns the provided value. + """ + + value = attr.ib() + + def __await__(self): + return self + + def __iter__(self): + return self + + def __next__(self): + raise StopIteration(self.value) + + +def maybe_awaitable(value): + """Convert a value to an awaitable if not already an awaitable. + """ + + if hasattr(value, "__await__"): + return value + + return DoneAwaitable(value) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index b50e3503f..43fd65d69 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -16,6 +16,7 @@ import logging import os +from typing import Dict import six from six.moves import intern @@ -37,7 +38,7 @@ def get_cache_factor_for(cache_name): caches_by_name = {} -collectors_by_name = {} +collectors_by_name = {} # type: Dict cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"]) cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 43f66ec4b..5ac2530a6 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -18,10 +18,12 @@ import inspect import logging import threading from collections import namedtuple +from typing import Any, cast from six import itervalues from prometheus_client import Gauge +from typing_extensions import Protocol from twisted.internet import defer @@ -37,6 +39,18 @@ from . import register_cache logger = logging.getLogger(__name__) +class _CachedFunction(Protocol): + invalidate = None # type: Any + invalidate_all = None # type: Any + invalidate_many = None # type: Any + prefill = None # type: Any + cache = None # type: Any + num_args = None # type: Any + + def __name__(self): + ... + + cache_pending_metric = Gauge( "synapse_util_caches_cache_pending", "Number of lookups currently pending for this cache", @@ -245,7 +259,9 @@ class Cache(object): class _CacheDescriptorBase(object): - def __init__(self, orig, num_args, inlineCallbacks, cache_context=False): + def __init__( + self, orig: _CachedFunction, num_args, inlineCallbacks, cache_context=False + ): self.orig = orig if inlineCallbacks: @@ -404,7 +420,7 @@ class CacheDescriptor(_CacheDescriptorBase): return tuple(get_cache_key_gen(args, kwargs)) @functools.wraps(self.orig) - def wrapped(*args, **kwargs): + def _wrapped(*args, **kwargs): # If we're passed a cache_context then we'll want to call its invalidate() # whenever we are invalidated invalidate_callback = kwargs.pop("on_invalidate", None) @@ -440,6 +456,8 @@ class CacheDescriptor(_CacheDescriptorBase): return make_deferred_yieldable(observer) + wrapped = cast(_CachedFunction, _wrapped) + if self.num_args == 1: wrapped.invalidate = lambda key: cache.invalidate(key[0]) wrapped.prefill = lambda key, val: cache.prefill(key[0], val) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index 9a72218d8..2ea4e4e91 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -1,3 +1,5 @@ +from typing import Dict + from six import itervalues SENTINEL = object() @@ -12,7 +14,7 @@ class TreeCache(object): def __init__(self): self.size = 0 - self.root = {} + self.root = {} # type: Dict def __setitem__(self, key, value): return self.set(key, value) diff --git a/synapse/util/hash.py b/synapse/util/hash.py new file mode 100644 index 000000000..359168704 --- /dev/null +++ b/synapse/util/hash.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 0910930c2..4b1bcdf23 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -60,12 +60,14 @@ in_flight = InFlightGauge( ) -def measure_func(name): +def measure_func(name=None): def wrapper(func): + block_name = func.__name__ if name is None else name + @wraps(func) @defer.inlineCallbacks def measured_func(self, *args, **kwargs): - with Measure(self.clock, name): + with Measure(self.clock, block_name): r = yield func(self, *args, **kwargs) return r diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py index 522acd5aa..2705cbe5f 100644 --- a/synapse/util/module_loader.py +++ b/synapse/util/module_loader.py @@ -14,12 +14,13 @@ # limitations under the License. import importlib +import importlib.util from synapse.config._base import ConfigError def load_module(provider): - """ Loads a module with its config + """ Loads a synapse module with its config Take a dict with keys 'module' (the module name) and 'config' (the config dict). @@ -38,3 +39,20 @@ def load_module(provider): raise ConfigError("Failed to parse config for %r: %r" % (provider["module"], e)) return provider_class, provider_config + + +def load_python_module(location: str): + """Load a python module, and return a reference to its global namespace + + Args: + location (str): path to the module + + Returns: + python module object + """ + spec = importlib.util.spec_from_file_location(location, location) + if spec is None: + raise Exception("Unable to load module at %s" % (location,)) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) # type: ignore + return mod diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py new file mode 100644 index 000000000..3925927f9 --- /dev/null +++ b/synapse/util/patch_inline_callbacks.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import functools +import sys +from typing import Any, Callable, List + +from twisted.internet import defer +from twisted.internet.defer import Deferred +from twisted.python.failure import Failure + +# Tracks if we've already patched inlineCallbacks +_already_patched = False + + +def do_patch(): + """ + Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit + """ + + from synapse.logging.context import LoggingContext + + global _already_patched + + orig_inline_callbacks = defer.inlineCallbacks + if _already_patched: + return + + def new_inline_callbacks(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + start_context = LoggingContext.current_context() + changes = [] # type: List[str] + orig = orig_inline_callbacks(_check_yield_points(f, changes)) + + try: + res = orig(*args, **kwargs) + except Exception: + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + + err = "%s changed context from %s to %s on exception" % ( + f, + start_context, + LoggingContext.current_context(), + ) + print(err, file=sys.stderr) + raise Exception(err) + raise + + if not isinstance(res, Deferred) or res.called: + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + + err = "Completed %s changed context from %s to %s" % ( + f, + start_context, + LoggingContext.current_context(), + ) + # print the error to stderr because otherwise all we + # see in travis-ci is the 500 error + print(err, file=sys.stderr) + raise Exception(err) + return res + + if LoggingContext.current_context() != LoggingContext.sentinel: + err = ( + "%s returned incomplete deferred in non-sentinel context " + "%s (start was %s)" + ) % (f, LoggingContext.current_context(), start_context) + print(err, file=sys.stderr) + raise Exception(err) + + def check_ctx(r): + if LoggingContext.current_context() != start_context: + for err in changes: + print(err, file=sys.stderr) + err = "%s completion of %s changed context from %s to %s" % ( + "Failure" if isinstance(r, Failure) else "Success", + f, + start_context, + LoggingContext.current_context(), + ) + print(err, file=sys.stderr) + raise Exception(err) + return r + + res.addBoth(check_ctx) + return res + + return wrapped + + defer.inlineCallbacks = new_inline_callbacks + _already_patched = True + + +def _check_yield_points(f: Callable, changes: List[str]): + """Wraps a generator that is about to be passed to defer.inlineCallbacks + checking that after every yield the log contexts are correct. + + It's perfectly valid for log contexts to change within a function, e.g. due + to new Measure blocks, so such changes are added to the given `changes` + list instead of triggering an exception. + + Args: + f: generator function to wrap + changes: A list of strings detailing how the contexts + changed within a function. + + Returns: + function + """ + + from synapse.logging.context import LoggingContext + + @functools.wraps(f) + def check_yield_points_inner(*args, **kwargs): + gen = f(*args, **kwargs) + + last_yield_line_no = gen.gi_frame.f_lineno + result = None # type: Any + while True: + expected_context = LoggingContext.current_context() + + try: + isFailure = isinstance(result, Failure) + if isFailure: + d = result.throwExceptionIntoGenerator(gen) + else: + d = gen.send(result) + except (StopIteration, defer._DefGen_Return) as e: + if LoggingContext.current_context() != expected_context: + # This happens when the context is lost sometime *after* the + # final yield and returning. E.g. we forgot to yield on a + # function that returns a deferred. + # + # We don't raise here as it's perfectly valid for contexts to + # change in a function, as long as it sets the correct context + # on resolving (which is checked separately). + err = ( + "Function %r returned and changed context from %s to %s," + " in %s between %d and end of func" + % ( + f.__qualname__, + expected_context, + LoggingContext.current_context(), + f.__code__.co_filename, + last_yield_line_no, + ) + ) + changes.append(err) + return getattr(e, "value", None) + + frame = gen.gi_frame + + if isinstance(d, defer.Deferred) and not d.called: + # This happens if we yield on a deferred that doesn't follow + # the log context rules without wrapping in a `make_deferred_yieldable`. + # We raise here as this should never happen. + if LoggingContext.current_context() is not LoggingContext.sentinel: + err = ( + "%s yielded with context %s rather than sentinel," + " yielded on line %d in %s" + % ( + frame.f_code.co_name, + LoggingContext.current_context(), + frame.f_lineno, + frame.f_code.co_filename, + ) + ) + raise Exception(err) + + try: + result = yield d + except Exception as e: + result = Failure(e) + + if LoggingContext.current_context() != expected_context: + + # This happens because the context is lost sometime *after* the + # previous yield and *after* the current yield. E.g. the + # deferred we waited on didn't follow the rules, or we forgot to + # yield on a function between the two yield points. + # + # We don't raise here as its perfectly valid for contexts to + # change in a function, as long as it sets the correct context + # on resolving (which is checked separately). + err = ( + "%s changed context from %s to %s, happened between lines %d and %d in %s" + % ( + frame.f_code.co_name, + expected_context, + LoggingContext.current_context(), + last_yield_line_no, + frame.f_lineno, + frame.f_code.co_filename, + ) + ) + changes.append(err) + + last_yield_line_no = frame.f_lineno + + return check_yield_points_inner diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 0862b5ca5..af6958719 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -22,6 +22,15 @@ from synapse.api.errors import CodeMessageException logger = logging.getLogger(__name__) +# the intial backoff, after the first transaction fails +MIN_RETRY_INTERVAL = 10 * 60 * 1000 + +# how much we multiply the backoff by after each subsequent fail +RETRY_MULTIPLIER = 5 + +# a cap on the backoff. (Essentially none) +MAX_RETRY_INTERVAL = 2 ** 62 + class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): @@ -71,11 +80,13 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) # We aren't ready to retry that destination. raise """ + failure_ts = None retry_last_ts, retry_interval = (0, 0) retry_timings = yield store.get_destination_retry_timings(destination) if retry_timings: + failure_ts = retry_timings["failure_ts"] retry_last_ts, retry_interval = ( retry_timings["retry_last_ts"], retry_timings["retry_interval"], @@ -99,6 +110,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs) destination, clock, store, + failure_ts, retry_interval, backoff_on_failure=backoff_on_failure, **kwargs @@ -111,10 +123,8 @@ class RetryDestinationLimiter(object): destination, clock, store, + failure_ts, retry_interval, - min_retry_interval=10 * 60 * 1000, - max_retry_interval=24 * 60 * 60 * 1000, - multiplier_retry_interval=5, backoff_on_404=False, backoff_on_failure=True, ): @@ -127,15 +137,11 @@ class RetryDestinationLimiter(object): destination (str) clock (Clock) store (DataStore) + failure_ts (int|None): when this destination started failing (in ms since + the epoch), or zero if the last request was successful retry_interval (int): The next retry interval taken from the database in milliseconds, or zero if the last request was successful. - min_retry_interval (int): The minimum retry interval to use after - a failed request, in milliseconds. - max_retry_interval (int): The maximum retry interval to use after - a failed request, in milliseconds. - multiplier_retry_interval (int): The multiplier to use to increase - the retry interval after a failed request. backoff_on_404 (bool): Back off if we get a 404 backoff_on_failure (bool): set to False if we should not increase the @@ -145,10 +151,8 @@ class RetryDestinationLimiter(object): self.store = store self.destination = destination + self.failure_ts = failure_ts self.retry_interval = retry_interval - self.min_retry_interval = min_retry_interval - self.max_retry_interval = max_retry_interval - self.multiplier_retry_interval = multiplier_retry_interval self.backoff_on_404 = backoff_on_404 self.backoff_on_failure = backoff_on_failure @@ -189,6 +193,7 @@ class RetryDestinationLimiter(object): logger.debug( "Connection to %s was successful; clearing backoff", self.destination ) + self.failure_ts = None retry_last_ts = 0 self.retry_interval = 0 elif not self.backoff_on_failure: @@ -196,13 +201,14 @@ class RetryDestinationLimiter(object): else: # We couldn't connect. if self.retry_interval: - self.retry_interval *= self.multiplier_retry_interval - self.retry_interval *= int(random.uniform(0.8, 1.4)) + self.retry_interval = int( + self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + ) - if self.retry_interval >= self.max_retry_interval: - self.retry_interval = self.max_retry_interval + if self.retry_interval >= MAX_RETRY_INTERVAL: + self.retry_interval = MAX_RETRY_INTERVAL else: - self.retry_interval = self.min_retry_interval + self.retry_interval = MIN_RETRY_INTERVAL logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", @@ -213,11 +219,17 @@ class RetryDestinationLimiter(object): ) retry_last_ts = int(self.clock.time_msec()) + if self.failure_ts is None: + self.failure_ts = retry_last_ts + @defer.inlineCallbacks def store_retry_timings(): try: yield self.store.set_destination_retry_timings( - self.destination, retry_last_ts, self.retry_interval + self.destination, + self.failure_ts, + retry_last_ts, + self.retry_interval, ) except Exception: logger.exception("Failed to store destination_retry_timings") diff --git a/synctl b/synctl index a9629cf0e..45acece30 100755 --- a/synctl +++ b/synctl @@ -71,7 +71,20 @@ def abort(message, colour=RED, stream=sys.stderr): sys.exit(1) -def start(configfile, daemonize=True): +def start(configfile: str, daemonize: bool = True) -> bool: + """Attempts to start synapse. + Args: + configfile: path to a yaml synapse config file + daemonize: whether to daemonize synapse or keep it attached to the current + session + + Returns: + True if the process started successfully + False if there was an error starting the process + + If deamonize is False it will only return once synapse exits. + """ + write("Starting ...") args = SYNAPSE @@ -83,25 +96,40 @@ def start(configfile, daemonize=True): try: subprocess.check_call(args) write("started synapse.app.homeserver(%r)" % (configfile,), colour=GREEN) + return True except subprocess.CalledProcessError as e: write( "error starting (exit code: %d); see above for logs" % e.returncode, colour=RED, ) + return False -def start_worker(app, configfile, worker_configfile): +def start_worker(app: str, configfile: str, worker_configfile: str) -> bool: + """Attempts to start a synapse worker. + Args: + app: name of the worker's appservice + configfile: path to a yaml synapse config file + worker_configfile: path to worker specific yaml synapse file + + Returns: + True if the process started successfully + False if there was an error starting the process + """ + args = [sys.executable, "-B", "-m", app, "-c", configfile, "-c", worker_configfile] try: subprocess.check_call(args) write("started %s(%r)" % (app, worker_configfile), colour=GREEN) + return True except subprocess.CalledProcessError as e: write( "error starting %s(%r) (exit code: %d); see above for logs" % (app, worker_configfile, e.returncode), colour=RED, ) + return False def stop(pidfile, app): @@ -292,11 +320,14 @@ def main(): write("All processes exited; now restarting...") if action == "start" or action == "restart": + error = False if start_stop_synapse: # Check if synapse is already running if os.path.exists(pidfile) and pid_running(int(open(pidfile).read())): abort("synapse.app.homeserver already running") - start(configfile, bool(options.daemonize)) + + if not start(configfile, bool(options.daemonize)): + error = True for worker in workers: env = os.environ.copy() @@ -307,12 +338,16 @@ def main(): for cache_name, factor in iteritems(worker.cache_factors): os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor) - start_worker(worker.app, configfile, worker.configfile) + if not start_worker(worker.app, configfile, worker.configfile): + error = True # Reset env back to the original os.environ.clear() os.environ.update(env) + if error: + exit(1) + if __name__ == "__main__": main() diff --git a/tests/__init__.py b/tests/__init__.py index f7fc502f0..ed805db1c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -16,9 +16,9 @@ from twisted.trial import util -import tests.patch_inline_callbacks +from synapse.util.patch_inline_callbacks import do_patch # attempt to do the patch before we load any synapse code -tests.patch_inline_callbacks.do_patch() +do_patch() util.DEFAULT_TIMEOUT_DURATION = 20 diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index c0cb8ef29..6121efcfa 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,6 +21,7 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth +from synapse.api.constants import UserTypes from synapse.api.errors import ( AuthError, Codes, @@ -335,6 +336,23 @@ class AuthTestCase(unittest.TestCase): ) yield self.auth.check_auth_blocking() + @defer.inlineCallbacks + def test_blocking_mau__depending_on_user_type(self): + self.hs.config.max_mau_value = 50 + self.hs.config.limit_usage_by_mau = True + + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Support users allowed + yield self.auth.check_auth_blocking(user_type=UserTypes.SUPPORT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Bots not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking(user_type=UserTypes.BOT) + self.store.get_monthly_active_count = Mock(return_value=defer.succeed(100)) + # Real users not allowed + with self.assertRaises(ResourceLimitError): + yield self.auth.check_auth_blocking() + @defer.inlineCallbacks def test_reserved_threepid(self): self.hs.config.limit_usage_by_mau = True diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py index 8e0c4b953..1be6ff563 100644 --- a/tests/config/test_tls.py +++ b/tests/config/test_tls.py @@ -16,21 +16,29 @@ import os +import idna import yaml from OpenSSL import SSL +from synapse.config._base import Config, RootConfig from synapse.config.tls import ConfigError, TlsConfig from synapse.crypto.context_factory import ClientTLSOptionsFactory from tests.unittest import TestCase -class TestConfig(TlsConfig): +class FakeServer(Config): + section = "server" + def has_tls_listener(self): return False +class TestConfig(RootConfig): + config_classes = [FakeServer, TlsConfig] + + class TLSConfigTests(TestCase): def test_warn_self_signed(self): """ @@ -201,13 +209,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - None, # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain=None, # This is the acme_domain ) ), "/config_dir_path", @@ -222,16 +230,55 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg= conf = TestConfig() conf.read_config( yaml.safe_load( - TestConfig().generate_config_section( + TestConfig().generate_config( "/config_dir_path", "my_super_secure_server", "/data_dir_path", - "/tls_cert_path", - "tls_private_key", - "my_supe_secure_server", # This is the acme_domain + tls_certificate_path="/tls_cert_path", + tls_private_key_path="tls_private_key", + acme_domain="my_supe_secure_server", # This is the acme_domain ) ), "/config_dir_path", ) self.assertTrue(conf.acme_enabled) + + def test_whitelist_idna_failure(self): + """ + The federation certificate whitelist will not allow IDNA domain names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.ドメイン.テスト", + ] + } + t = TestConfig() + e = self.assertRaises( + ConfigError, t.read_config, config, config_dir_path="", data_dir_path="" + ) + self.assertIn("IDNA domain names", str(e)) + + def test_whitelist_idna_result(self): + """ + The federation certificate whitelist will match on IDNA encoded names. + """ + config = { + "federation_certificate_verification_whitelist": [ + "example.com", + "*.xn--eckwd4c7c.xn--zckzah", + ] + } + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + cf = ClientTLSOptionsFactory(t) + + # Not in the whitelist + opts = cf.get_options(b"notexample.com") + self.assertTrue(opts._verifier._verify_certs) + + # Caught by the wildcard + opts = cf.get_options(idna.encode("テスト.ドメイン.テスト")) + self.assertFalse(opts._verifier._verify_certs) diff --git a/tests/handlers/test_e2e_room_keys.py b/tests/handlers/test_e2e_room_keys.py index c4503c161..0bb96674a 100644 --- a/tests/handlers/test_e2e_room_keys.py +++ b/tests/handlers/test_e2e_room_keys.py @@ -187,9 +187,8 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase): self.assertEqual(res, 404) @defer.inlineCallbacks - def test_update_bad_version(self): - """Check that we get a 400 if the version in the body is missing or - doesn't match + def test_update_omitted_version(self): + """Check that the update succeeds if the version is missing from the body """ version = yield self.handler.create_version( self.local_user, @@ -197,19 +196,35 @@ class E2eRoomKeysHandlerTestCase(unittest.TestCase): ) self.assertEqual(version, "1") - res = None - try: - yield self.handler.update_version( - self.local_user, - version, - { - "algorithm": "m.megolm_backup.v1", - "auth_data": "revised_first_version_auth_data", - }, - ) - except errors.SynapseError as e: - res = e.code - self.assertEqual(res, 400) + yield self.handler.update_version( + self.local_user, + version, + { + "algorithm": "m.megolm_backup.v1", + "auth_data": "revised_first_version_auth_data", + }, + ) + + # check we can retrieve it as the current version + res = yield self.handler.get_version_info(self.local_user) + self.assertDictEqual( + res, + { + "algorithm": "m.megolm_backup.v1", + "auth_data": "revised_first_version_auth_data", + "version": version, + }, + ) + + @defer.inlineCallbacks + def test_update_bad_version(self): + """Check that we get a 400 if the version in the body doesn't match + """ + version = yield self.handler.create_version( + self.local_user, + {"algorithm": "m.megolm_backup.v1", "auth_data": "first_version_auth_data"}, + ) + self.assertEqual(version, "1") res = None try: diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py new file mode 100644 index 000000000..d56220f40 --- /dev/null +++ b/tests/handlers/test_federation.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.api.constants import EventTypes +from synapse.api.errors import AuthError, Codes +from synapse.rest import admin +from synapse.rest.client.v1 import login, room + +from tests import unittest + + +class FederationTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver(http_client=None) + self.handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + return hs + + def test_exchange_revoked_invite(self): + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # Send a 3PID invite event with an empty body so it's considered as a revoked one. + invite_token = "sometoken" + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.ThirdPartyInvite, + state_key=invite_token, + body={}, + tok=tok, + ) + + d = self.handler.on_exchange_third_party_invite_request( + room_id=room_id, + event_dict={ + "type": EventTypes.Member, + "room_id": room_id, + "sender": user_id, + "state_key": "@someone:example.org", + "content": { + "membership": "invite", + "third_party_invite": { + "display_name": "alice", + "signed": { + "mxid": "@alice:localhost", + "token": invite_token, + "signatures": { + "magic.forest": { + "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg" + } + }, + }, + }, + }, + }, + ) + + failure = self.get_failure(d, AuthError).value + + self.assertEqual(failure.code, 403, failure) + self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure) + self.assertEqual(failure.msg, "You are not invited to this room.") diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index f70c6e7d6..d4293b431 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -22,6 +22,7 @@ from synapse.api.constants import EventTypes, Membership, PresenceState from synapse.events import room_version_to_event_format from synapse.events.builder import EventBuilder from synapse.handlers.presence import ( + EXTERNAL_PROCESS_EXPIRY, FEDERATION_PING_INTERVAL, FEDERATION_TIMEOUT, IDLE_TIMER, @@ -413,6 +414,44 @@ class PresenceTimeoutTestCase(unittest.TestCase): self.assertEquals(state, new_state) +class PresenceHandlerTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, hs): + self.presence_handler = hs.get_presence_handler() + self.clock = hs.get_clock() + + def test_external_process_timeout(self): + """Test that if an external process doesn't update the records for a while + we time out their syncing users presence. + """ + process_id = 1 + user_id = "@test:server" + + # Notify handler that a user is now syncing. + self.get_success( + self.presence_handler.update_external_syncs_row( + process_id, user_id, True, self.clock.time_msec() + ) + ) + + # Check that if we wait a while without telling the handler the user has + # stopped syncing that their presence state doesn't get timed out. + self.reactor.advance(EXTERNAL_PROCESS_EXPIRY / 2) + + state = self.get_success( + self.presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.ONLINE) + + # Check that if the external process timeout fires, then the syncing + # user gets timed out + self.reactor.advance(EXTERNAL_PROCESS_EXPIRY) + + state = self.get_success( + self.presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.OFFLINE) + + class PresenceJoinTestCase(unittest.HomeserverTestCase): """Tests remote servers get told about presence of users in the room when they join and when new local users join. diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index e10296a5e..1e9ba3a20 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -171,11 +171,11 @@ class RegistrationTestCase(unittest.HomeserverTestCase): rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) - def test_auto_create_auto_join_rooms_when_support_user_exists(self): + def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self): room_alias_str = "#room:test" self.hs.config.auto_join_rooms = [room_alias_str] - self.store.is_support_user = Mock(return_value=True) + self.store.is_real_user = Mock(return_value=False) user_id = self.get_success(self.handler.register_user(localpart="support")) rooms = self.get_success(self.store.get_rooms_for_user(user_id)) self.assertEqual(len(rooms), 0) @@ -183,6 +183,31 @@ class RegistrationTestCase(unittest.HomeserverTestCase): room_alias = RoomAlias.from_string(room_alias_str) self.get_failure(directory_handler.get_association(room_alias), SynapseError) + def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=1) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + directory_handler = self.hs.get_handlers().directory_handler + room_alias = RoomAlias.from_string(room_alias_str) + room_id = self.get_success(directory_handler.get_association(room_alias)) + + self.assertTrue(room_id["room_id"] in rooms) + self.assertEqual(len(rooms), 1) + + def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.count_real_users = Mock(return_value=2) + self.store.is_real_user = Mock(return_value=True) + user_id = self.get_success(self.handler.register_user(localpart="real")) + rooms = self.get_success(self.store.get_rooms_for_user(user_id)) + self.assertEqual(len(rooms), 0) + def test_auto_create_auto_join_where_no_consent(self): """Test to ensure that the first user is not auto-joined to a room if they have not given general consent. diff --git a/tests/handlers/test_roomlist.py b/tests/handlers/test_roomlist.py deleted file mode 100644 index 61eebb698..000000000 --- a/tests/handlers/test_roomlist.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.handlers.room_list import RoomListNextBatch - -import tests.unittest -import tests.utils - - -class RoomListTestCase(tests.unittest.TestCase): - """ Tests RoomList's RoomListNextBatch. """ - - def setUp(self): - pass - - def test_check_read_batch_tokens(self): - batch_token = RoomListNextBatch( - stream_ordering="abcdef", - public_room_stream_id="123", - current_limit=20, - direction_is_forward=True, - ).to_token() - next_batch = RoomListNextBatch.from_token(batch_token) - self.assertEquals(next_batch.stream_ordering, "abcdef") - self.assertEquals(next_batch.public_room_stream_id, "123") - self.assertEquals(next_batch.current_limit, 20) - self.assertEquals(next_batch.direction_is_forward, True) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 5d5e324df..67f101305 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -99,7 +99,12 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] self.datastore = hs.get_datastore() - retry_timings_res = {"destination": "", "retry_last_ts": 0, "retry_interval": 0} + retry_timings_res = { + "destination": "", + "retry_last_ts": 0, + "retry_interval": 0, + "failure_ts": None, + } self.datastore.get_destination_retry_timings.return_value = defer.succeed( retry_timings_res ) @@ -134,7 +139,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): defer.succeed(1) ) - self.datastore.get_current_state_deltas.return_value = None + self.datastore.get_current_state_deltas.return_value = (0, None) self.datastore.get_to_device_stream_token = lambda: 0 self.datastore.get_new_device_msgs_for_remote = lambda *args, **kargs: ([], 0) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index a786de023..451d05c0f 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import os.path import shutil @@ -33,7 +34,20 @@ class FakeBeginner(object): self.observers = observers -class StructuredLoggingTestCase(HomeserverTestCase): +class StructuredLoggingTestBase(object): + """ + Test base that registers a cleanup handler to reset the stdlib log handler + to 'unset'. + """ + + def prepare(self, reactor, clock, hs): + def _cleanup(): + logging.getLogger("synapse").setLevel(logging.NOTSET) + + self.addCleanup(_cleanup) + + +class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase): """ Tests for Synapse's structured logging support. """ @@ -139,7 +153,9 @@ class StructuredLoggingTestCase(HomeserverTestCase): self.assertEqual(logs[0]["request"], "somereq") -class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): +class StructuredLoggingConfigurationFileTestCase( + StructuredLoggingTestBase, HomeserverTestCase +): def make_homeserver(self, reactor, clock): tempdir = self.mktemp() @@ -179,10 +195,11 @@ class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): """ When a structured logging config is given, Synapse will use it. """ - setup_logging(self.hs, self.hs.config) + beginner = FakeBeginner() + publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner) # Make a logger and send an event - logger = Logger(namespace="tests.logging.test_structured") + logger = Logger(namespace="tests.logging.test_structured", observer=publisher) with LoggingContext("testcontext", request="somereq"): logger.info("Hello there, {name}!", name="steve") diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 514282591..4cf81f712 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -23,10 +23,10 @@ from synapse.logging._structured import setup_structured_logging from tests.server import connect_client from tests.unittest import HomeserverTestCase -from .test_structured import FakeBeginner +from .test_structured import FakeBeginner, StructuredLoggingTestBase -class TerseJSONTCPTestCase(HomeserverTestCase): +class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase): def test_log_output(self): """ The Terse JSON outputter delivers simplified structured logs over TCP. diff --git a/tests/patch_inline_callbacks.py b/tests/patch_inline_callbacks.py deleted file mode 100644 index 220884311..000000000 --- a/tests/patch_inline_callbacks.py +++ /dev/null @@ -1,94 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import functools -import sys - -from twisted.internet import defer -from twisted.internet.defer import Deferred -from twisted.python.failure import Failure - - -def do_patch(): - """ - Patch defer.inlineCallbacks so that it checks the state of the logcontext on exit - """ - - from synapse.logging.context import LoggingContext - - orig_inline_callbacks = defer.inlineCallbacks - - def new_inline_callbacks(f): - - orig = orig_inline_callbacks(f) - - @functools.wraps(f) - def wrapped(*args, **kwargs): - start_context = LoggingContext.current_context() - - try: - res = orig(*args, **kwargs) - except Exception: - if LoggingContext.current_context() != start_context: - err = "%s changed context from %s to %s on exception" % ( - f, - start_context, - LoggingContext.current_context(), - ) - print(err, file=sys.stderr) - raise Exception(err) - raise - - if not isinstance(res, Deferred) or res.called: - if LoggingContext.current_context() != start_context: - err = "%s changed context from %s to %s" % ( - f, - start_context, - LoggingContext.current_context(), - ) - # print the error to stderr because otherwise all we - # see in travis-ci is the 500 error - print(err, file=sys.stderr) - raise Exception(err) - return res - - if LoggingContext.current_context() != LoggingContext.sentinel: - err = ( - "%s returned incomplete deferred in non-sentinel context " - "%s (start was %s)" - ) % (f, LoggingContext.current_context(), start_context) - print(err, file=sys.stderr) - raise Exception(err) - - def check_ctx(r): - if LoggingContext.current_context() != start_context: - err = "%s completion of %s changed context from %s to %s" % ( - "Failure" if isinstance(r, Failure) else "Success", - f, - start_context, - LoggingContext.current_context(), - ) - print(err, file=sys.stderr) - raise Exception(err) - return r - - res.addBoth(check_ctx) - return res - - return wrapped - - defer.inlineCallbacks = new_inline_callbacks diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 5877bb213..d3a4f717f 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -62,7 +62,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.device_handler.check_device_registered = Mock(return_value="FAKE") self.datastore = Mock(return_value=Mock()) - self.datastore.get_current_state_deltas = Mock(return_value=[]) + self.datastore.get_current_state_deltas = Mock(return_value=(0, [])) self.secrets = Mock() diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index fe66e397c..d2bcf256f 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -30,6 +30,14 @@ class RedactionsTestCase(HomeserverTestCase): sync.register_servlets, ] + def make_homeserver(self, reactor, clock): + config = self.default_config() + + config["rc_message"] = {"per_second": 0.2, "burst_count": 10} + config["rc_admin_redaction"] = {"per_second": 1, "burst_count": 100} + + return self.setup_test_homeserver(config=config) + def prepare(self, reactor, clock, hs): # register a couple of users self.mod_user_id = self.register_user("user1", "pass") @@ -177,3 +185,20 @@ class RedactionsTestCase(HomeserverTestCase): self._redact_event( self.other_access_token, self.room_id, create_event_id, expect_code=403 ) + + def test_redact_event_as_moderator_ratelimit(self): + """Tests that the correct ratelimiting is applied to redactions + """ + + message_ids = [] + # as a regular user, send messages to redact + for _ in range(20): + b = self.helper.send(room_id=self.room_id, tok=self.other_access_token) + message_ids.append(b["event_id"]) + self.reactor.advance(10) # To get around ratelimits + + # as the moderator, send a bunch of redactions + for msg_id in message_ids: + # These should all succeed, even though this would be denied by + # the standard message ratelimiter + self._redact_event(self.mod_access_token, self.room_id, msg_id) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index fe741637f..2f2ca7461 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -484,6 +484,15 @@ class RoomsCreateTestCase(RoomBase): self.render(request) self.assertEquals(400, channel.code) + def test_post_room_invitees_invalid_mxid(self): + # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088 + # Note the trailing space in the MXID here! + request, channel = self.make_request( + "POST", "/createRoom", b'{"invite":["@alice:example.com "]}' + ) + self.render(request) + self.assertEquals(400, channel.code) + class RoomTopicTestCase(RoomBase): """ Tests /rooms/$room_id/topic REST events. """ diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py index 920de41de..0f51895b8 100644 --- a/tests/rest/client/v2_alpha/test_account.py +++ b/tests/rest/client/v2_alpha/test_account.py @@ -23,8 +23,8 @@ from email.parser import Parser import pkg_resources import synapse.rest.admin -from synapse.api.constants import LoginType -from synapse.rest.client.v1 import login +from synapse.api.constants import LoginType, Membership +from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import account, register from tests import unittest @@ -244,16 +244,66 @@ class DeactivateTestCase(unittest.HomeserverTestCase): synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, account.register_servlets, + room.register_servlets, ] def make_homeserver(self, reactor, clock): - hs = self.setup_test_homeserver() - return hs + self.hs = self.setup_test_homeserver() + return self.hs def test_deactivate_account(self): user_id = self.register_user("kermit", "test") tok = self.login("kermit", "test") + self.deactivate(user_id, tok) + + store = self.hs.get_datastore() + + # Check that the user has been marked as deactivated. + self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id))) + + # Check that this access token has been invalidated. + request, channel = self.make_request("GET", "account/whoami") + self.render(request) + self.assertEqual(request.code, 401) + + @unittest.INFO + def test_pending_invites(self): + """Tests that deactivating a user rejects every pending invite for them.""" + store = self.hs.get_datastore() + + inviter_id = self.register_user("inviter", "test") + inviter_tok = self.login("inviter", "test") + + invitee_id = self.register_user("invitee", "test") + invitee_tok = self.login("invitee", "test") + + # Make @inviter:test invite @invitee:test in a new room. + room_id = self.helper.create_room_as(inviter_id, tok=inviter_tok) + self.helper.invite( + room=room_id, src=inviter_id, targ=invitee_id, tok=inviter_tok + ) + + # Make sure the invite is here. + pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) + self.assertEqual(len(pending_invites), 1, pending_invites) + self.assertEqual(pending_invites[0].room_id, room_id, pending_invites) + + # Deactivate @invitee:test. + self.deactivate(invitee_id, invitee_tok) + + # Check that the invite isn't there anymore. + pending_invites = self.get_success(store.get_invited_rooms_for_user(invitee_id)) + self.assertEqual(len(pending_invites), 0, pending_invites) + + # Check that the membership of @invitee:test in the room is now "leave". + memberships = self.get_success( + store.get_rooms_for_user_where_membership_is(invitee_id, [Membership.LEAVE]) + ) + self.assertEqual(len(memberships), 1, memberships) + self.assertEqual(memberships[0].room_id, room_id, memberships) + + def deactivate(self, user_id, tok): request_data = json.dumps( { "auth": { @@ -269,13 +319,3 @@ class DeactivateTestCase(unittest.HomeserverTestCase): ) self.render(request) self.assertEqual(request.code, 200) - - store = self.hs.get_datastore() - - # Check that the user has been marked as deactivated. - self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id))) - - # Check that this access token has been invalidated. - request, channel = self.make_request("GET", "account/whoami") - self.render(request) - self.assertEqual(request.code, 401) diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py index b9ef46e8f..b6df1396a 100644 --- a/tests/rest/client/v2_alpha/test_auth.py +++ b/tests/rest/client/v2_alpha/test_auth.py @@ -18,11 +18,22 @@ from twisted.internet.defer import succeed import synapse.rest.admin from synapse.api.constants import LoginType +from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.rest.client.v2_alpha import auth, register from tests import unittest +class DummyRecaptchaChecker(UserInteractiveAuthChecker): + def __init__(self, hs): + super().__init__(hs) + self.recaptcha_attempts = [] + + def check_auth(self, authdict, clientip): + self.recaptcha_attempts.append((authdict, clientip)) + return succeed(True) + + class FallbackAuthTests(unittest.HomeserverTestCase): servlets = [ @@ -44,15 +55,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase): return hs def prepare(self, reactor, clock, hs): + self.recaptcha_checker = DummyRecaptchaChecker(hs) auth_handler = hs.get_auth_handler() - - self.recaptcha_attempts = [] - - def _recaptcha(authdict, clientip): - self.recaptcha_attempts.append((authdict, clientip)) - return succeed(True) - - auth_handler.checkers[LoginType.RECAPTCHA] = _recaptcha + auth_handler.checkers[LoginType.RECAPTCHA] = self.recaptcha_checker @unittest.INFO def test_fallback_captcha(self): @@ -89,8 +94,9 @@ class FallbackAuthTests(unittest.HomeserverTestCase): self.assertEqual(request.code, 200) # The recaptcha handler is called with the response given - self.assertEqual(len(self.recaptcha_attempts), 1) - self.assertEqual(self.recaptcha_attempts[0][0]["response"], "a") + attempts = self.recaptcha_checker.recaptcha_attempts + self.assertEqual(len(attempts), 1) + self.assertEqual(attempts[0][0]["response"], "a") # also complete the dummy auth request, channel = self.make_request( diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index f42a8efbf..e0e9e94fb 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -92,7 +92,7 @@ class FilterTestCase(unittest.HomeserverTestCase): ) self.render(request) - self.assertEqual(channel.result["code"], b"400") + self.assertEqual(channel.result["code"], b"404") self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND) # Currently invalid params do not have an appropriate errcode diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index ab4d7d70d..dab87e5ed 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -34,19 +34,12 @@ from tests import unittest class RegisterRestServletTestCase(unittest.HomeserverTestCase): servlets = [register.register_servlets] + url = b"/_matrix/client/r0/register" - def make_homeserver(self, reactor, clock): - - self.url = b"/_matrix/client/r0/register" - - self.hs = self.setup_test_homeserver() - self.hs.config.enable_registration = True - self.hs.config.registrations_require_3pid = [] - self.hs.config.auto_join_rooms = [] - self.hs.config.enable_registration_captcha = False - self.hs.config.allow_guest_access = True - - return self.hs + def default_config(self, name="test"): + config = super().default_config(name) + config["allow_guest_access"] = True + return config def test_POST_appservice_registration_valid(self): user_id = "@as_user_kermit:test" @@ -199,6 +192,73 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"200", channel.result) + def test_advertised_flows(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + # with the stock config, we only expect the dummy flow + self.assertCountEqual([["m.login.dummy"]], (f["stages"] for f in flows)) + + @unittest.override_config( + { + "enable_registration_captcha": True, + "user_consent": { + "version": "1", + "template_dir": "/", + "require_at_registration": True, + }, + "account_threepid_delegates": { + "email": "https://id_server", + "msisdn": "https://id_server", + }, + } + ) + def test_advertised_flows_captcha_and_terms_and_3pids(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + self.assertCountEqual( + [ + ["m.login.recaptcha", "m.login.terms", "m.login.dummy"], + ["m.login.recaptcha", "m.login.terms", "m.login.email.identity"], + ["m.login.recaptcha", "m.login.terms", "m.login.msisdn"], + [ + "m.login.recaptcha", + "m.login.terms", + "m.login.msisdn", + "m.login.email.identity", + ], + ], + (f["stages"] for f in flows), + ) + + @unittest.override_config( + { + "public_baseurl": "https://test_server", + "registrations_require_3pid": ["email"], + "disable_msisdn_registration": True, + "email": { + "smtp_host": "mail_server", + "smtp_port": 2525, + "notif_from": "sender@host", + }, + } + ) + def test_advertised_flows_no_msisdn_email_required(self): + request, channel = self.make_request(b"POST", self.url, b"{}") + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + flows = channel.json_body["flows"] + + # with the stock config, we expect all four combinations of 3pid + self.assertCountEqual( + [["m.login.email.identity"]], (f["stages"] for f in flows) + ) + class AccountValidityTestCase(unittest.HomeserverTestCase): diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py index e9e2d5337..34f9c7270 100644 --- a/tests/storage/test_cleanup_extrems.py +++ b/tests/storage/test_cleanup_extrems.py @@ -14,7 +14,13 @@ # limitations under the License. import os.path +from unittest.mock import patch +from mock import Mock + +import synapse.rest.admin +from synapse.api.constants import EventTypes +from synapse.rest.client.v1 import login, room from synapse.storage import prepare_database from synapse.types import Requester, UserID @@ -225,6 +231,14 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): class CleanupExtremDummyEventsTestCase(HomeserverTestCase): + CONSENT_VERSION = "1" + EXTREMITIES_COUNT = 50 + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + def make_homeserver(self, reactor, clock): config = self.default_config() config["cleanup_extremities_with_dummy_events"] = True @@ -233,27 +247,19 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase): def prepare(self, reactor, clock, homeserver): self.store = homeserver.get_datastore() self.room_creator = homeserver.get_room_creation_handler() + self.event_creator_handler = homeserver.get_event_creation_handler() # Create a test user and room - self.user = UserID("alice", "test") + self.user = UserID.from_string(self.register_user("user1", "password")) + self.token1 = self.login("user1", "password") self.requester = Requester(self.user, None, False, None, None) info = self.get_success(self.room_creator.create_room(self.requester, {})) self.room_id = info["room_id"] + self.event_creator = homeserver.get_event_creation_handler() + homeserver.config.user_consent_version = self.CONSENT_VERSION def test_send_dummy_event(self): - # Create a bushy graph with 50 extremities. - - event_id_start = self.create_and_send_event(self.room_id, self.user) - - for _ in range(50): - self.create_and_send_event( - self.room_id, self.user, prev_event_ids=[event_id_start] - ) - - latest_event_ids = self.get_success( - self.store.get_latest_event_ids_in_room(self.room_id) - ) - self.assertEqual(len(latest_event_ids), 50) + self._create_extremity_rich_graph() # Pump the reactor repeatedly so that the background updates have a # chance to run. @@ -263,3 +269,126 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase): self.store.get_latest_event_ids_in_room(self.room_id) ) self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0) + def test_send_dummy_events_when_insufficient_power(self): + self._create_extremity_rich_graph() + # Criple power levels + self.helper.send_state( + self.room_id, + EventTypes.PowerLevels, + body={"users": {str(self.user): -1}}, + tok=self.token1, + ) + # Pump the reactor repeatedly so that the background updates have a + # chance to run. + self.pump(10 * 60) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + # Check that the room has not been pruned + self.assertTrue(len(latest_event_ids) > 10) + + # New user with regular levels + user2 = self.register_user("user2", "password") + token2 = self.login("user2", "password") + self.helper.join(self.room_id, user2, tok=token2) + self.pump(10 * 60) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0) + def test_send_dummy_event_without_consent(self): + self._create_extremity_rich_graph() + self._enable_consent_checking() + + # Pump the reactor repeatedly so that the background updates have a + # chance to run. Attempt to add dummy event with user that has not consented + # Check that dummy event send fails. + self.pump(10 * 60) + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertTrue(len(latest_event_ids) == self.EXTREMITIES_COUNT) + + # Create new user, and add consent + user2 = self.register_user("user2", "password") + token2 = self.login("user2", "password") + self.get_success( + self.store.user_set_consent_version(user2, self.CONSENT_VERSION) + ) + self.helper.join(self.room_id, user2, tok=token2) + + # Background updates should now cause a dummy event to be added to the graph + self.pump(10 * 60) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids)) + + @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=250) + def test_expiry_logic(self): + """Simple test to ensure that _expire_rooms_to_exclude_from_dummy_event_insertion() + expires old entries correctly. + """ + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "1" + ] = 100000 + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "2" + ] = 200000 + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion[ + "3" + ] = 300000 + self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion() + # All entries within time frame + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 3, + ) + # Oldest room to expire + self.pump(1) + self.event_creator_handler._expire_rooms_to_exclude_from_dummy_event_insertion() + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 2, + ) + # All rooms to expire + self.pump(2) + self.assertEqual( + len( + self.event_creator_handler._rooms_to_exclude_from_dummy_event_insertion + ), + 0, + ) + + def _create_extremity_rich_graph(self): + """Helper method to create bushy graph on demand""" + + event_id_start = self.create_and_send_event(self.room_id, self.user) + + for _ in range(self.EXTREMITIES_COUNT): + self.create_and_send_event( + self.room_id, self.user, prev_event_ids=[event_id_start] + ) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(len(latest_event_ids), 50) + + def _enable_consent_checking(self): + """Helper method to enable consent checking""" + self.event_creator._block_events_without_consent_error = "No consent from user" + consent_uri_builder = Mock() + consent_uri_builder.build_user_consent_uri.return_value = "http://example.com" + self.event_creator._consent_uri_builder = consent_uri_builder diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 09305c3bf..afac5dec7 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -55,7 +55,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): { "user_id": user_id, "device_id": "device_id", - "access_token": "access_token", "ip": "ip", "user_agent": "user_agent", "last_seen": 12345678000, @@ -201,6 +200,156 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertTrue(active) + def test_devices_last_seen_bg_update(self): + # First make sure we have completed all updates. + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # Insert a user IP + user_id = "@user:id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) + ) + + # Force persisting to disk + self.reactor.advance(200) + + # But clear the associated entry in devices table + self.get_success( + self.store._simple_update( + table="devices", + keyvalues={"user_id": user_id, "device_id": "device_id"}, + updatevalues={"last_seen": None, "ip": None, "user_agent": None}, + desc="test_devices_last_seen_bg_update", + ) + ) + + # We should now get nulls when querying + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": None, + "user_agent": None, + "last_seen": None, + }, + r, + ) + + # Register the background update to run again. + self.get_success( + self.store._simple_insert( + table="background_updates", + values={ + "update_name": "devices_last_seen", + "progress_json": "{}", + "depends_on": None, + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store._all_done = False + + # Now let's actually drive the updates to completion + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # We should now get the correct result again + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": "ip", + "user_agent": "user_agent", + "last_seen": 0, + }, + r, + ) + + def test_old_user_ips_pruned(self): + # First make sure we have completed all updates. + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + # Insert a user IP + user_id = "@user:id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) + ) + + # Force persisting to disk + self.reactor.advance(200) + + # We should see that in the DB + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + self.assertEqual( + result, + [ + { + "access_token": "access_token", + "ip": "ip", + "user_agent": "user_agent", + "device_id": "device_id", + "last_seen": 0, + } + ], + ) + + # Now advance by a couple of months + self.reactor.advance(60 * 24 * 60 * 60) + + # We should get no results. + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + self.assertEqual(result, []) + + # But we should still get the correct values for the device + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) + + r = result[(user_id, "device_id")] + self.assertDictContainsSubset( + { + "user_id": user_id, + "device_id": "device_id", + "ip": "ip", + "user_agent": "user_agent", + "last_seen": 0, + }, + r, + ) + class ClientIpAuthTestCase(unittest.HomeserverTestCase): diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py index c8ece1528..398d54628 100644 --- a/tests/storage/test_end_to_end_keys.py +++ b/tests/storage/test_end_to_end_keys.py @@ -38,7 +38,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): self.assertIn("user", res) self.assertIn("device", res["user"]) dev = res["user"]["device"] - self.assertDictContainsSubset({"keys": json, "device_display_name": None}, dev) + self.assertDictContainsSubset(json, dev) @defer.inlineCallbacks def test_reupload_key(self): @@ -68,7 +68,7 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): self.assertIn("device", res["user"]) dev = res["user"]["device"] self.assertDictContainsSubset( - {"keys": json, "device_display_name": "display_name"}, dev + {"key": "value", "unsigned": {"device_display_name": "display_name"}}, dev ) @defer.inlineCallbacks @@ -80,10 +80,10 @@ class EndToEndKeyStoreTestCase(tests.unittest.TestCase): yield self.store.store_device("user2", "device1", None) yield self.store.store_device("user2", "device2", None) - yield self.store.set_e2e_device_keys("user1", "device1", now, "json11") - yield self.store.set_e2e_device_keys("user1", "device2", now, "json12") - yield self.store.set_e2e_device_keys("user2", "device1", now, "json21") - yield self.store.set_e2e_device_keys("user2", "device2", now, "json22") + yield self.store.set_e2e_device_keys("user1", "device1", now, {"key": "json11"}) + yield self.store.set_e2e_device_keys("user1", "device2", now, {"key": "json12"}) + yield self.store.set_e2e_device_keys("user2", "device1", now, {"key": "json21"}) + yield self.store.set_e2e_device_keys("user2", "device2", now, {"key": "json22"}) res = yield self.store.get_e2e_device_keys( (("user1", "device1"), ("user2", "device2")) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 86c7ac350..2fe50377f 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -57,7 +57,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase): "(event_id, algorithm, hash) " "VALUES (?, 'sha256', ?)" ), - (event_id, b"ffff"), + (event_id, bytearray(b"ffff")), ) for i in range(0, 11): @@ -75,3 +75,43 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase): el = r[i] depth = el[2] self.assertLessEqual(5, depth) + + @defer.inlineCallbacks + def test_get_rooms_with_many_extremities(self): + room1 = "#room1" + room2 = "#room2" + room3 = "#room3" + + def insert_event(txn, i, room_id): + event_id = "$event_%i:local" % i + txn.execute( + ( + "INSERT INTO event_forward_extremities (room_id, event_id) " + "VALUES (?, ?)" + ), + (room_id, event_id), + ) + + for i in range(0, 20): + yield self.store.runInteraction("insert", insert_event, i, room1) + yield self.store.runInteraction("insert", insert_event, i, room2) + yield self.store.runInteraction("insert", insert_event, i, room3) + + # Test simple case + r = yield self.store.get_rooms_with_many_extremities(5, 5, []) + self.assertEqual(len(r), 3) + + # Does filter work? + + r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1]) + self.assertTrue(room2 in r) + self.assertTrue(room3 in r) + self.assertEqual(len(r), 2) + + r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1, room2]) + self.assertEqual(r, [room3]) + + # Does filter and limit work? + + r = yield self.store.get_rooms_with_many_extremities(5, 1, [room1]) + self.assertTrue(r == [room2] or r == [room3]) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 1494650d1..90a63dc47 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -50,6 +50,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): {"medium": "email", "address": user2_email}, {"medium": "email", "address": user3_email}, ] + self.hs.config.mau_limits_reserved_threepids = threepids # -1 because user3 is a support user and does not count user_num = len(threepids) - 1 @@ -84,6 +85,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.hs.config.max_mau_value = 0 self.reactor.advance(FORTY_DAYS) + self.hs.config.max_mau_value = 5 self.store.reap_monthly_active_users() self.pump() @@ -147,9 +149,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.store.reap_monthly_active_users() self.pump() count = self.store.get_monthly_active_count() - self.assertEquals( - self.get_success(count), initial_users - self.hs.config.max_mau_value - ) + self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) self.reactor.advance(FORTY_DAYS) self.store.reap_monthly_active_users() @@ -158,6 +158,44 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): count = self.store.get_monthly_active_count() self.assertEquals(self.get_success(count), 0) + def test_reap_monthly_active_users_reserved_users(self): + """ Tests that reaping correctly handles reaping where reserved users are + present""" + + self.hs.config.max_mau_value = 5 + initial_users = 5 + reserved_user_number = initial_users - 1 + threepids = [] + for i in range(initial_users): + user = "@user%d:server" % i + email = "user%d@example.com" % i + self.get_success(self.store.upsert_monthly_active_user(user)) + threepids.append({"medium": "email", "address": email}) + # Need to ensure that the most recent entries in the + # monthly_active_users table are reserved + now = int(self.hs.get_clock().time_msec()) + if i != 0: + self.get_success( + self.store.register_user(user_id=user, password_hash=None) + ) + self.get_success( + self.store.user_add_threepid(user, "email", email, now, now) + ) + + self.hs.config.mau_limits_reserved_threepids = threepids + self.store.runInteraction( + "initialise", self.store._initialise_reserved_users, threepids + ) + count = self.store.get_monthly_active_count() + self.assertTrue(self.get_success(count), initial_users) + + users = self.store.get_registered_reserved_users() + self.assertEquals(len(self.get_success(users)), reserved_user_number) + + self.get_success(self.store.reap_monthly_active_users()) + count = self.store.get_monthly_active_count() + self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) + def test_populate_monthly_users_is_guest(self): # Test that guest users are not added to mau list user_id = "@user_id:host" @@ -192,12 +230,13 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): def test_get_reserved_real_user_account(self): # Test no reserved users, or reserved threepids - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), 0) + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), 0) # Test reserved users but no registered users user1 = "@user1:example.com" user2 = "@user2:example.com" + user1_email = "user1@example.com" user2_email = "user2@example.com" threepids = [ @@ -210,8 +249,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): ) self.pump() - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), 0) + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), 0) # Test reserved registed users self.store.register_user(user_id=user1, password_hash=None) @@ -221,8 +260,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): now = int(self.hs.get_clock().time_msec()) self.store.user_add_threepid(user1, "email", user1_email, now, now) self.store.user_add_threepid(user2, "email", user2_email, now, now) - count = self.store.get_registered_reserved_users_count() - self.assertEquals(self.get_success(count), len(threepids)) + + users = self.get_success(self.store.get_registered_reserved_users()) + self.assertEquals(len(users), len(threepids)) def test_support_user_not_add_to_mau_limits(self): support_user_id = "@support:test" diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index d961b81d4..427d3c49c 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -17,6 +17,8 @@ from mock import Mock +from canonicaljson import json + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -29,8 +31,10 @@ from tests.utils import create_room class RedactionTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): + config = self.default_config() + config["redaction_retention_period"] = "30d" return self.setup_test_homeserver( - resource_for_federation=Mock(), http_client=None + resource_for_federation=Mock(), http_client=None, config=config ) def prepare(self, reactor, clock, hs): @@ -114,6 +118,8 @@ class RedactionTestCase(unittest.HomeserverTestCase): self.get_success(self.store.persist_event(event, context)) + return event + def test_redact(self): self.get_success( self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -286,3 +292,108 @@ class RedactionTestCase(unittest.HomeserverTestCase): self.assertEqual( fetched.unsigned["redacted_because"].event_id, redaction_event_id2 ) + + def test_redact_censor(self): + """Test that a redacted event gets censored in the DB after a month + """ + + self.get_success( + self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) + ) + + msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t")) + + # Check event has not been redacted: + event = self.get_success(self.store.get_event(msg_event.event_id)) + + self.assertObjectHasAttributes( + { + "type": EventTypes.Message, + "user_id": self.u_alice.to_string(), + "content": {"body": "t", "msgtype": "message"}, + }, + event, + ) + + self.assertFalse("redacted_because" in event.unsigned) + + # Redact event + reason = "Because I said so" + self.get_success( + self.inject_redaction(self.room1, msg_event.event_id, self.u_alice, reason) + ) + + event = self.get_success(self.store.get_event(msg_event.event_id)) + + self.assertTrue("redacted_because" in event.unsigned) + + self.assertObjectHasAttributes( + { + "type": EventTypes.Message, + "user_id": self.u_alice.to_string(), + "content": {}, + }, + event, + ) + + event_json = self.get_success( + self.store._simple_select_one_onecol( + table="event_json", + keyvalues={"event_id": msg_event.event_id}, + retcol="json", + ) + ) + + self.assert_dict( + {"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json) + ) + + # Advance by 30 days, then advance again to ensure that the looping call + # for updating the stream position gets called and then the looping call + # for the censoring gets called. + self.reactor.advance(60 * 60 * 24 * 31) + self.reactor.advance(60 * 60 * 2) + + event_json = self.get_success( + self.store._simple_select_one_onecol( + table="event_json", + keyvalues={"event_id": msg_event.event_id}, + retcol="json", + ) + ) + + self.assert_dict({"content": {}}, json.loads(event_json)) + + def test_redact_redaction(self): + """Tests that we can redact a redaction and can fetch it again. + """ + + self.get_success( + self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) + ) + + msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t")) + + first_redact_event = self.get_success( + self.inject_redaction( + self.room1, msg_event.event_id, self.u_alice, "Redacting message" + ) + ) + + self.get_success( + self.inject_redaction( + self.room1, + first_redact_event.event_id, + self.u_alice, + "Redacting redaction", + ) + ) + + # Now lets jump to the future where we have censored the redaction event + # in the DB. + self.reactor.advance(60 * 60 * 24 * 31) + + # We just want to check that fetching the event doesn't raise an exception. + self.get_success( + self.store.get_event(first_redact_event.event_id, allow_none=True) + ) diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 14169afa9..8e817e2c7 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.util.retryutils import MAX_RETRY_INTERVAL + from tests.unittest import HomeserverTestCase @@ -29,17 +31,28 @@ class TransactionStoreTestCase(HomeserverTestCase): r = self.get_success(d) self.assertIsNone(r) - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) self.get_success(d) d = self.store.get_destination_retry_timings("example.com") r = self.get_success(d) - self.assert_dict({"retry_last_ts": 50, "retry_interval": 100}, r) + self.assert_dict( + {"retry_last_ts": 50, "retry_interval": 100, "failure_ts": 1000}, r + ) def test_initial_set_transactions(self): """Tests that we can successfully set the destination retries (there was a bug around invalidating the cache that broke this) """ - d = self.store.set_destination_retry_timings("example.com", 50, 100) + d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100) + self.get_success(d) + + def test_large_destination_retry(self): + d = self.store.set_destination_retry_timings( + "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL + ) + self.get_success(d) + + d = self.store.get_destination_retry_timings("example.com") self.get_success(d) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 2edbae5c6..270f853d6 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd +# Copyright 2019 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.metrics import InFlightGauge +from synapse.metrics import REGISTRY, InFlightGauge, generate_latest from tests import unittest @@ -111,3 +111,21 @@ class TestMauLimit(unittest.TestCase): } return results + + +class BuildInfoTests(unittest.TestCase): + def test_get_build(self): + """ + The synapse_build_info metric reports the OS version, Python version, + and Synapse version. + """ + items = list( + filter( + lambda x: b"synapse_build_info{" in x, + generate_latest(REGISTRY).split(b"\n"), + ) + ) + self.assertEqual(len(items), 1) + self.assertTrue(b"osversion=" in items[0]) + self.assertTrue(b"pythonversion=" in items[0]) + self.assertTrue(b"version=" in items[0]) diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py index 52739fbab..5ec5d2b35 100644 --- a/tests/test_terms_auth.py +++ b/tests/test_terms_auth.py @@ -28,6 +28,21 @@ from tests import unittest class TermsTestCase(unittest.HomeserverTestCase): servlets = [register_servlets] + def default_config(self, name="test"): + config = super().default_config(name) + config.update( + { + "public_baseurl": "https://example.org/", + "user_consent": { + "version": "1.0", + "policy_name": "My Cool Privacy Policy", + "template_dir": "/", + "require_at_registration": True, + }, + } + ) + return config + def prepare(self, reactor, clock, hs): self.clock = MemoryReactorClock() self.hs_clock = Clock(self.clock) @@ -35,17 +50,8 @@ class TermsTestCase(unittest.HomeserverTestCase): self.registration_handler = Mock() self.auth_handler = Mock() self.device_handler = Mock() - hs.config.enable_registration = True - hs.config.registrations_require_3pid = [] - hs.config.auto_join_rooms = [] - hs.config.enable_registration_captcha = False def test_ui_auth(self): - self.hs.config.user_consent_at_registration = True - self.hs.config.user_consent_policy_name = "My Cool Privacy Policy" - self.hs.config.public_baseurl = "https://example.org/" - self.hs.config.user_consent_version = "1.0" - # Do a UI auth request request, channel = self.make_request(b"POST", self.url, b"{}") self.render(request) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py new file mode 100644 index 000000000..9e348694a --- /dev/null +++ b/tests/util/test_retryutils.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.util.retryutils import ( + MIN_RETRY_INTERVAL, + RETRY_MULTIPLIER, + NotRetryingDestination, + get_retry_limiter, +) + +from tests.unittest import HomeserverTestCase + + +class RetryLimiterTestCase(HomeserverTestCase): + def test_new_destination(self): + """A happy-path case with a new destination and a successful operation""" + store = self.hs.get_datastore() + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + # advance the clock a bit before making the request + self.pump(1) + + with limiter: + pass + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) + + def test_limiter(self): + """General test case which walks through the process of a failing request""" + store = self.hs.get_datastore() + + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + failure_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], failure_ts) + self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL) + + # now if we try again we should get a failure + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + self.failureResultOf(d, NotRetryingDestination) + + # + # advance the clock and try again + # + + self.pump(MIN_RETRY_INTERVAL) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + try: + with limiter: + self.pump(1) + retry_ts = self.clock.time_msec() + raise AssertionError("argh") + except AssertionError: + pass + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertEqual(new_timings["failure_ts"], failure_ts) + self.assertEqual(new_timings["retry_last_ts"], retry_ts) + self.assertGreaterEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + ) + self.assertLessEqual( + new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + ) + + # + # one more go, with success + # + self.pump(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + d = get_retry_limiter("test_dest", self.clock, store) + self.pump() + limiter = self.successResultOf(d) + + self.pump(1) + with limiter: + self.pump(1) + + # wait for the update to land + self.pump() + + d = store.get_destination_retry_timings("test_dest") + self.pump() + new_timings = self.successResultOf(d) + self.assertIsNone(new_timings) diff --git a/tox.ini b/tox.ini index 7cb40847b..7ba6f6339 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,7 @@ envlist = packaging, py35, py36, py37, check_codestyle, check_isort [base] +basepython = python3.7 deps = mock python-subunit @@ -137,18 +138,34 @@ commands = {toxinidir}/scripts-dev/generate_sample_config --check skip_install = True deps = coverage -whitelist_externals = - bash commands= coverage combine coverage report +[testenv:cov-erase] +skip_install = True +deps = + coverage +commands= + coverage erase + +[testenv:cov-html] +skip_install = True +deps = + coverage +commands= + coverage html + [testenv:mypy] -basepython = python3.5 +basepython = python3.7 +skip_install = True deps = {[base]deps} - mypy + mypy==0.730 + mypy-zope +env = + MYPYPATH = stubs/ extras = all -commands = mypy --ignore-missing-imports \ - synapse/logging/_structured.py \ - synapse/logging/_terse_json.py +commands = mypy --show-traceback --check-untyped-defs --show-error-codes --follow-imports=normal \ + synapse/logging/ \ + synapse/config/