Merge pull request #6301 from matrix-org/babolivier/msc2326

Implement MSC2326 (label based filtering)
This commit is contained in:
Brendan Abolivier 2019-11-01 17:04:45 +00:00 committed by GitHub
commit f496d25877
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 396 additions and 9 deletions

1
changelog.d/6301.feature Normal file
View File

@ -0,0 +1 @@
Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)).

View File

@ -138,3 +138,10 @@ class LimitBlockingTypes(object):
MONTHLY_ACTIVE_USER = "monthly_active_user"
HS_DISABLED = "hs_disabled"
class EventContentFields(object):
"""Fields found in events' content, regardless of type."""
# Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326
LABELS = "org.matrix.labels"

View File

@ -20,6 +20,7 @@ from jsonschema import FormatChecker
from twisted.internet import defer
from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError
from synapse.storage.presence import UserPresenceState
from synapse.types import RoomID, UserID
@ -66,6 +67,10 @@ ROOM_EVENT_FILTER_SCHEMA = {
"contains_url": {"type": "boolean"},
"lazy_load_members": {"type": "boolean"},
"include_redundant_members": {"type": "boolean"},
# Include or exclude events with the provided labels.
# cf https://github.com/matrix-org/matrix-doc/pull/2326
"org.matrix.labels": {"type": "array", "items": {"type": "string"}},
"org.matrix.not_labels": {"type": "array", "items": {"type": "string"}},
},
}
@ -259,6 +264,9 @@ class Filter(object):
self.contains_url = self.filter_json.get("contains_url", None)
self.labels = self.filter_json.get("org.matrix.labels", None)
self.not_labels = self.filter_json.get("org.matrix.not_labels", [])
def filters_all_types(self):
return "*" in self.not_types
@ -282,6 +290,7 @@ class Filter(object):
room_id = None
ev_type = "m.presence"
contains_url = False
labels = []
else:
sender = event.get("sender", None)
if not sender:
@ -300,10 +309,11 @@ class Filter(object):
content = event.get("content", {})
# check if there is a string url field in the content for filtering purposes
contains_url = isinstance(content.get("url"), text_type)
labels = content.get(EventContentFields.LABELS, [])
return self.check_fields(room_id, sender, ev_type, contains_url)
return self.check_fields(room_id, sender, ev_type, labels, contains_url)
def check_fields(self, room_id, sender, event_type, contains_url):
def check_fields(self, room_id, sender, event_type, labels, contains_url):
"""Checks whether the filter matches the given event fields.
Returns:
@ -313,6 +323,7 @@ class Filter(object):
"rooms": lambda v: room_id == v,
"senders": lambda v: sender == v,
"types": lambda v: _matches_wildcard(event_type, v),
"labels": lambda v: v in labels,
}
for name, match_func in literal_keys.items():

View File

@ -65,6 +65,9 @@ class VersionsRestServlet(RestServlet):
"m.require_identity_server": False,
# as per MSC2290
"m.separate_add_and_bind": True,
# Implements support for label-based filtering as described in
# MSC2326.
"org.matrix.label_based_filtering": True,
},
},
)

View File

@ -29,7 +29,7 @@ from prometheus_client import Counter
from twisted.internet import defer
import synapse.metrics
from synapse.api.constants import EventTypes
from synapse.api.constants import EventContentFields, EventTypes
from synapse.api.errors import SynapseError
from synapse.events import EventBase # noqa: F401
from synapse.events.snapshot import EventContext # noqa: F401
@ -932,6 +932,13 @@ class EventsStore(
self._handle_event_relations(txn, event)
# Store the labels for this event.
labels = event.content.get(EventContentFields.LABELS)
if labels:
self.insert_labels_for_event_txn(
txn, event.event_id, labels, event.room_id, event.depth
)
# Insert into the room_memberships table.
self._store_room_members_txn(
txn,
@ -1917,6 +1924,33 @@ class EventsStore(
get_all_updated_current_state_deltas_txn,
)
def insert_labels_for_event_txn(
self, txn, event_id, labels, room_id, topological_ordering
):
"""Store the mapping between an event's ID and its labels, with one row per
(event_id, label) tuple.
Args:
txn (LoggingTransaction): The transaction to execute.
event_id (str): The event's ID.
labels (list[str]): A list of text labels.
room_id (str): The ID of the room the event was sent to.
topological_ordering (int): The position of the event in the room's topology.
"""
return self._simple_insert_many_txn(
txn=txn,
table="event_labels",
values=[
{
"event_id": event_id,
"label": label,
"room_id": room_id,
"topological_ordering": topological_ordering,
}
for label in labels
],
)
AllNewEventsResult = namedtuple(
"AllNewEventsResult",

View File

@ -0,0 +1,30 @@
/* Copyright 2019 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- room_id and topoligical_ordering are denormalised from the events table in order to
-- make the index work.
CREATE TABLE IF NOT EXISTS event_labels (
event_id TEXT,
label TEXT,
room_id TEXT NOT NULL,
topological_ordering BIGINT NOT NULL,
PRIMARY KEY(event_id, label)
);
-- This index enables an event pagination looking for a particular label to index the
-- event_labels table first, which is much quicker than scanning the events table and then
-- filtering by label, if the label is rarely used relative to the size of the room.
CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering);

View File

@ -229,6 +229,14 @@ def filter_to_clause(event_filter):
clauses.append("contains_url = ?")
args.append(event_filter.contains_url)
# We're only applying the "labels" filter on the database query, because applying the
# "not_labels" filter via a SQL query is non-trivial. Instead, we let
# event_filter.check_fields apply it, which is not as efficient but makes the
# implementation simpler.
if event_filter.labels:
clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels))
args.extend(event_filter.labels)
return " AND ".join(clauses), args
@ -864,8 +872,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
args.append(int(limit))
sql = (
"SELECT event_id, topological_ordering, stream_ordering"
"SELECT DISTINCT event_id, topological_ordering, stream_ordering"
" FROM events"
" LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)"
" WHERE outlier = ? AND room_id = ? AND %(bounds)s"
" ORDER BY topological_ordering %(order)s,"
" stream_ordering %(order)s LIMIT ?"

View File

@ -19,6 +19,7 @@ import jsonschema
from twisted.internet import defer
from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError
from synapse.api.filtering import Filter
from synapse.events import FrozenEvent
@ -95,6 +96,8 @@ class FilteringTestCase(unittest.TestCase):
"types": ["m.room.message"],
"not_rooms": ["!726s6s6q:example.com"],
"not_senders": ["@spam:example.com"],
"org.matrix.labels": ["#fun"],
"org.matrix.not_labels": ["#work"],
},
"ephemeral": {
"types": ["m.receipt", "m.typing"],
@ -320,6 +323,46 @@ class FilteringTestCase(unittest.TestCase):
)
self.assertFalse(Filter(definition).check(event))
def test_filter_labels(self):
definition = {"org.matrix.labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)
self.assertTrue(Filter(definition).check(event))
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)
self.assertFalse(Filter(definition).check(event))
def test_filter_not_labels(self):
definition = {"org.matrix.not_labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)
self.assertFalse(Filter(definition).check(event))
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)
self.assertTrue(Filter(definition).check(event))
@defer.inlineCallbacks
def test_filter_presence_match(self):
user_filter_json = {"presence": {"types": ["m.*"]}}

View File

@ -24,7 +24,7 @@ from six.moves.urllib import parse as urlparse
from twisted.internet import defer
import synapse.rest.admin
from synapse.api.constants import Membership
from synapse.api.constants import EventContentFields, EventTypes, Membership
from synapse.rest.client.v1 import login, profile, room
from tests import unittest
@ -811,6 +811,105 @@ class RoomMessageListTestCase(RoomBase):
self.assertTrue("chunk" in channel.json_body)
self.assertTrue("end" in channel.json_body)
def test_filter_labels(self):
"""Test that we can filter by a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.labels": ["#fun"]}
)
events = self._test_filter_labels(message_filter)
self.assertEqual(len(events), 2, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with right label", events[0])
self.assertEqual(events[1]["content"]["body"], "with right label", events[1])
def test_filter_not_labels(self):
"""Test that we can filter by the absence of a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.not_labels": ["#fun"]}
)
events = self._test_filter_labels(message_filter)
self.assertEqual(len(events), 3, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "without label", events[0])
self.assertEqual(events[1]["content"]["body"], "with wrong label", events[1])
self.assertEqual(
events[2]["content"]["body"], "with two wrong labels", events[2]
)
def test_filter_labels_not_labels(self):
"""Test that we can filter by both a label and the absence of another label."""
sync_filter = json.dumps(
{
"types": [EventTypes.Message],
"org.matrix.labels": ["#work"],
"org.matrix.not_labels": ["#notfun"],
}
)
events = self._test_filter_labels(sync_filter)
self.assertEqual(len(events), 1, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with wrong label", events[0])
def _test_filter_labels(self, message_filter):
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "without label"},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with wrong label",
EventContentFields.LABELS: ["#work"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with two wrong labels",
EventContentFields.LABELS: ["#work", "#notfun"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)
token = "s0_0_0_0_0_0_0_0_0"
request, channel = self.make_request(
"GET",
"/rooms/%s/messages?access_token=x&from=%s&filter=%s"
% (self.room_id, token, message_filter),
)
self.render(request)
return channel.json_body["chunk"]
class RoomSearchTestCase(unittest.HomeserverTestCase):
servlets = [

View File

@ -106,13 +106,22 @@ class RestHelper(object):
self.auth_user_id = temp_id
def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200):
if txn_id is None:
txn_id = "m%s" % (str(time.time()))
if body is None:
body = "body_text_here"
path = "/_matrix/client/r0/rooms/%s/send/m.room.message/%s" % (room_id, txn_id)
content = {"msgtype": "m.text", "body": body}
return self.send_event(
room_id, "m.room.message", content, txn_id, tok, expect_code
)
def send_event(
self, room_id, type, content={}, txn_id=None, tok=None, expect_code=200
):
if txn_id is None:
txn_id = "m%s" % (str(time.time()))
path = "/_matrix/client/r0/rooms/%s/send/%s/%s" % (room_id, type, txn_id)
if tok:
path = path + "?access_token=%s" % tok

View File

@ -12,10 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from mock import Mock
import synapse.rest.admin
from synapse.api.constants import EventContentFields, EventTypes
from synapse.rest.client.v1 import login, room
from synapse.rest.client.v2_alpha import sync
@ -26,7 +28,12 @@ from tests.server import TimedOutException
class FilterTestCase(unittest.HomeserverTestCase):
user_id = "@apple:test"
servlets = [sync.register_servlets]
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
sync.register_servlets,
]
def make_homeserver(self, reactor, clock):
@ -70,6 +77,140 @@ class FilterTestCase(unittest.HomeserverTestCase):
)
class SyncFilterTestCase(unittest.HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
sync.register_servlets,
]
def test_sync_filter_labels(self):
"""Test that we can filter by a label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.labels": ["#fun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 2, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with right label", events[0])
self.assertEqual(events[1]["content"]["body"], "with right label", events[1])
def test_sync_filter_not_labels(self):
"""Test that we can filter by the absence of a label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.not_labels": ["#fun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 3, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "without label", events[0])
self.assertEqual(events[1]["content"]["body"], "with wrong label", events[1])
self.assertEqual(
events[2]["content"]["body"], "with two wrong labels", events[2]
)
def test_sync_filter_labels_not_labels(self):
"""Test that we can filter by both a label and the absence of another label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.labels": ["#work"],
"org.matrix.not_labels": ["#notfun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 1, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with wrong label", events[0])
def _test_sync_filter_labels(self, sync_filter):
user_id = self.register_user("kermit", "test")
tok = self.login("kermit", "test")
room_id = self.helper.create_room_as(user_id, tok=tok)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "without label"},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with wrong label",
EventContentFields.LABELS: ["#work"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with two wrong labels",
EventContentFields.LABELS: ["#work", "#notfun"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
tok=tok,
)
request, channel = self.make_request(
"GET", "/sync?filter=%s" % sync_filter, access_token=tok
)
self.render(request)
self.assertEqual(channel.code, 200, channel.result)
return channel.json_body["rooms"]["join"][room_id]["timeline"]["events"]
class SyncTypingTests(unittest.HomeserverTestCase):
servlets = [