Merge pull request #6301 from matrix-org/babolivier/msc2326

Implement MSC2326 (label based filtering)
This commit is contained in:
Brendan Abolivier 2019-11-01 17:04:45 +00:00 committed by GitHub
commit f496d25877
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 396 additions and 9 deletions

1
changelog.d/6301.feature Normal file
View File

@ -0,0 +1 @@
Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)).

View File

@ -138,3 +138,10 @@ class LimitBlockingTypes(object):
MONTHLY_ACTIVE_USER = "monthly_active_user" MONTHLY_ACTIVE_USER = "monthly_active_user"
HS_DISABLED = "hs_disabled" HS_DISABLED = "hs_disabled"
class EventContentFields(object):
"""Fields found in events' content, regardless of type."""
# Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326
LABELS = "org.matrix.labels"

View File

@ -20,6 +20,7 @@ from jsonschema import FormatChecker
from twisted.internet import defer from twisted.internet import defer
from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.storage.presence import UserPresenceState from synapse.storage.presence import UserPresenceState
from synapse.types import RoomID, UserID from synapse.types import RoomID, UserID
@ -66,6 +67,10 @@ ROOM_EVENT_FILTER_SCHEMA = {
"contains_url": {"type": "boolean"}, "contains_url": {"type": "boolean"},
"lazy_load_members": {"type": "boolean"}, "lazy_load_members": {"type": "boolean"},
"include_redundant_members": {"type": "boolean"}, "include_redundant_members": {"type": "boolean"},
# Include or exclude events with the provided labels.
# cf https://github.com/matrix-org/matrix-doc/pull/2326
"org.matrix.labels": {"type": "array", "items": {"type": "string"}},
"org.matrix.not_labels": {"type": "array", "items": {"type": "string"}},
}, },
} }
@ -259,6 +264,9 @@ class Filter(object):
self.contains_url = self.filter_json.get("contains_url", None) self.contains_url = self.filter_json.get("contains_url", None)
self.labels = self.filter_json.get("org.matrix.labels", None)
self.not_labels = self.filter_json.get("org.matrix.not_labels", [])
def filters_all_types(self): def filters_all_types(self):
return "*" in self.not_types return "*" in self.not_types
@ -282,6 +290,7 @@ class Filter(object):
room_id = None room_id = None
ev_type = "m.presence" ev_type = "m.presence"
contains_url = False contains_url = False
labels = []
else: else:
sender = event.get("sender", None) sender = event.get("sender", None)
if not sender: if not sender:
@ -300,10 +309,11 @@ class Filter(object):
content = event.get("content", {}) content = event.get("content", {})
# check if there is a string url field in the content for filtering purposes # check if there is a string url field in the content for filtering purposes
contains_url = isinstance(content.get("url"), text_type) contains_url = isinstance(content.get("url"), text_type)
labels = content.get(EventContentFields.LABELS, [])
return self.check_fields(room_id, sender, ev_type, contains_url) return self.check_fields(room_id, sender, ev_type, labels, contains_url)
def check_fields(self, room_id, sender, event_type, contains_url): def check_fields(self, room_id, sender, event_type, labels, contains_url):
"""Checks whether the filter matches the given event fields. """Checks whether the filter matches the given event fields.
Returns: Returns:
@ -313,6 +323,7 @@ class Filter(object):
"rooms": lambda v: room_id == v, "rooms": lambda v: room_id == v,
"senders": lambda v: sender == v, "senders": lambda v: sender == v,
"types": lambda v: _matches_wildcard(event_type, v), "types": lambda v: _matches_wildcard(event_type, v),
"labels": lambda v: v in labels,
} }
for name, match_func in literal_keys.items(): for name, match_func in literal_keys.items():

View File

@ -65,6 +65,9 @@ class VersionsRestServlet(RestServlet):
"m.require_identity_server": False, "m.require_identity_server": False,
# as per MSC2290 # as per MSC2290
"m.separate_add_and_bind": True, "m.separate_add_and_bind": True,
# Implements support for label-based filtering as described in
# MSC2326.
"org.matrix.label_based_filtering": True,
}, },
}, },
) )

View File

@ -29,7 +29,7 @@ from prometheus_client import Counter
from twisted.internet import defer from twisted.internet import defer
import synapse.metrics import synapse.metrics
from synapse.api.constants import EventTypes from synapse.api.constants import EventContentFields, EventTypes
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.events import EventBase # noqa: F401 from synapse.events import EventBase # noqa: F401
from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401
@ -932,6 +932,13 @@ class EventsStore(
self._handle_event_relations(txn, event) self._handle_event_relations(txn, event)
# Store the labels for this event.
labels = event.content.get(EventContentFields.LABELS)
if labels:
self.insert_labels_for_event_txn(
txn, event.event_id, labels, event.room_id, event.depth
)
# Insert into the room_memberships table. # Insert into the room_memberships table.
self._store_room_members_txn( self._store_room_members_txn(
txn, txn,
@ -1917,6 +1924,33 @@ class EventsStore(
get_all_updated_current_state_deltas_txn, get_all_updated_current_state_deltas_txn,
) )
def insert_labels_for_event_txn(
self, txn, event_id, labels, room_id, topological_ordering
):
"""Store the mapping between an event's ID and its labels, with one row per
(event_id, label) tuple.
Args:
txn (LoggingTransaction): The transaction to execute.
event_id (str): The event's ID.
labels (list[str]): A list of text labels.
room_id (str): The ID of the room the event was sent to.
topological_ordering (int): The position of the event in the room's topology.
"""
return self._simple_insert_many_txn(
txn=txn,
table="event_labels",
values=[
{
"event_id": event_id,
"label": label,
"room_id": room_id,
"topological_ordering": topological_ordering,
}
for label in labels
],
)
AllNewEventsResult = namedtuple( AllNewEventsResult = namedtuple(
"AllNewEventsResult", "AllNewEventsResult",

View File

@ -0,0 +1,30 @@
/* Copyright 2019 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- room_id and topoligical_ordering are denormalised from the events table in order to
-- make the index work.
CREATE TABLE IF NOT EXISTS event_labels (
event_id TEXT,
label TEXT,
room_id TEXT NOT NULL,
topological_ordering BIGINT NOT NULL,
PRIMARY KEY(event_id, label)
);
-- This index enables an event pagination looking for a particular label to index the
-- event_labels table first, which is much quicker than scanning the events table and then
-- filtering by label, if the label is rarely used relative to the size of the room.
CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering);

View File

@ -229,6 +229,14 @@ def filter_to_clause(event_filter):
clauses.append("contains_url = ?") clauses.append("contains_url = ?")
args.append(event_filter.contains_url) args.append(event_filter.contains_url)
# We're only applying the "labels" filter on the database query, because applying the
# "not_labels" filter via a SQL query is non-trivial. Instead, we let
# event_filter.check_fields apply it, which is not as efficient but makes the
# implementation simpler.
if event_filter.labels:
clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels))
args.extend(event_filter.labels)
return " AND ".join(clauses), args return " AND ".join(clauses), args
@ -864,8 +872,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
args.append(int(limit)) args.append(int(limit))
sql = ( sql = (
"SELECT event_id, topological_ordering, stream_ordering" "SELECT DISTINCT event_id, topological_ordering, stream_ordering"
" FROM events" " FROM events"
" LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)"
" WHERE outlier = ? AND room_id = ? AND %(bounds)s" " WHERE outlier = ? AND room_id = ? AND %(bounds)s"
" ORDER BY topological_ordering %(order)s," " ORDER BY topological_ordering %(order)s,"
" stream_ordering %(order)s LIMIT ?" " stream_ordering %(order)s LIMIT ?"

View File

@ -19,6 +19,7 @@ import jsonschema
from twisted.internet import defer from twisted.internet import defer
from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.api.filtering import Filter from synapse.api.filtering import Filter
from synapse.events import FrozenEvent from synapse.events import FrozenEvent
@ -95,6 +96,8 @@ class FilteringTestCase(unittest.TestCase):
"types": ["m.room.message"], "types": ["m.room.message"],
"not_rooms": ["!726s6s6q:example.com"], "not_rooms": ["!726s6s6q:example.com"],
"not_senders": ["@spam:example.com"], "not_senders": ["@spam:example.com"],
"org.matrix.labels": ["#fun"],
"org.matrix.not_labels": ["#work"],
}, },
"ephemeral": { "ephemeral": {
"types": ["m.receipt", "m.typing"], "types": ["m.receipt", "m.typing"],
@ -320,6 +323,46 @@ class FilteringTestCase(unittest.TestCase):
) )
self.assertFalse(Filter(definition).check(event)) self.assertFalse(Filter(definition).check(event))
def test_filter_labels(self):
definition = {"org.matrix.labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)
self.assertTrue(Filter(definition).check(event))
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)
self.assertFalse(Filter(definition).check(event))
def test_filter_not_labels(self):
definition = {"org.matrix.not_labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)
self.assertFalse(Filter(definition).check(event))
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)
self.assertTrue(Filter(definition).check(event))
@defer.inlineCallbacks @defer.inlineCallbacks
def test_filter_presence_match(self): def test_filter_presence_match(self):
user_filter_json = {"presence": {"types": ["m.*"]}} user_filter_json = {"presence": {"types": ["m.*"]}}

View File

@ -24,7 +24,7 @@ from six.moves.urllib import parse as urlparse
from twisted.internet import defer from twisted.internet import defer
import synapse.rest.admin import synapse.rest.admin
from synapse.api.constants import Membership from synapse.api.constants import EventContentFields, EventTypes, Membership
from synapse.rest.client.v1 import login, profile, room from synapse.rest.client.v1 import login, profile, room
from tests import unittest from tests import unittest
@ -811,6 +811,105 @@ class RoomMessageListTestCase(RoomBase):
self.assertTrue("chunk" in channel.json_body) self.assertTrue("chunk" in channel.json_body)
self.assertTrue("end" in channel.json_body) self.assertTrue("end" in channel.json_body)
def test_filter_labels(self):
"""Test that we can filter by a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.labels": ["#fun"]}
)
events = self._test_filter_labels(message_filter)
self.assertEqual(len(events), 2, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with right label", events[0])
self.assertEqual(events[1]["content"]["body"], "with right label", events[1])
def test_filter_not_labels(self):
"""Test that we can filter by the absence of a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.not_labels": ["#fun"]}
)
events = self._test_filter_labels(message_filter)
self.assertEqual(len(events), 3, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "without label", events[0])
self.assertEqual(events[1]["content"]["body"], "with wrong label", events[1])
self.assertEqual(
events[2]["content"]["body"], "with two wrong labels", events[2]
)
def test_filter_labels_not_labels(self):
"""Test that we can filter by both a label and the absence of another label."""
sync_filter = json.dumps(
{
"types": [EventTypes.Message],
"org.matrix.labels": ["#work"],
"org.matrix.not_labels": ["#notfun"],
}
)
events = self._test_filter_labels(sync_filter)
self.assertEqual(len(events), 1, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with wrong label", events[0])
def _test_filter_labels(self, message_filter):
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "without label"},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with wrong label",
EventContentFields.LABELS: ["#work"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with two wrong labels",
EventContentFields.LABELS: ["#work", "#notfun"],
},
)
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)
token = "s0_0_0_0_0_0_0_0_0"
request, channel = self.make_request(
"GET",
"/rooms/%s/messages?access_token=x&from=%s&filter=%s"
% (self.room_id, token, message_filter),
)
self.render(request)
return channel.json_body["chunk"]
class RoomSearchTestCase(unittest.HomeserverTestCase): class RoomSearchTestCase(unittest.HomeserverTestCase):
servlets = [ servlets = [

View File

@ -106,13 +106,22 @@ class RestHelper(object):
self.auth_user_id = temp_id self.auth_user_id = temp_id
def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200): def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200):
if txn_id is None:
txn_id = "m%s" % (str(time.time()))
if body is None: if body is None:
body = "body_text_here" body = "body_text_here"
path = "/_matrix/client/r0/rooms/%s/send/m.room.message/%s" % (room_id, txn_id)
content = {"msgtype": "m.text", "body": body} content = {"msgtype": "m.text", "body": body}
return self.send_event(
room_id, "m.room.message", content, txn_id, tok, expect_code
)
def send_event(
self, room_id, type, content={}, txn_id=None, tok=None, expect_code=200
):
if txn_id is None:
txn_id = "m%s" % (str(time.time()))
path = "/_matrix/client/r0/rooms/%s/send/%s/%s" % (room_id, type, txn_id)
if tok: if tok:
path = path + "?access_token=%s" % tok path = path + "?access_token=%s" % tok

View File

@ -12,10 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
from mock import Mock from mock import Mock
import synapse.rest.admin import synapse.rest.admin
from synapse.api.constants import EventContentFields, EventTypes
from synapse.rest.client.v1 import login, room from synapse.rest.client.v1 import login, room
from synapse.rest.client.v2_alpha import sync from synapse.rest.client.v2_alpha import sync
@ -26,7 +28,12 @@ from tests.server import TimedOutException
class FilterTestCase(unittest.HomeserverTestCase): class FilterTestCase(unittest.HomeserverTestCase):
user_id = "@apple:test" user_id = "@apple:test"
servlets = [sync.register_servlets] servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
sync.register_servlets,
]
def make_homeserver(self, reactor, clock): def make_homeserver(self, reactor, clock):
@ -70,6 +77,140 @@ class FilterTestCase(unittest.HomeserverTestCase):
) )
class SyncFilterTestCase(unittest.HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
sync.register_servlets,
]
def test_sync_filter_labels(self):
"""Test that we can filter by a label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.labels": ["#fun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 2, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with right label", events[0])
self.assertEqual(events[1]["content"]["body"], "with right label", events[1])
def test_sync_filter_not_labels(self):
"""Test that we can filter by the absence of a label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.not_labels": ["#fun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 3, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "without label", events[0])
self.assertEqual(events[1]["content"]["body"], "with wrong label", events[1])
self.assertEqual(
events[2]["content"]["body"], "with two wrong labels", events[2]
)
def test_sync_filter_labels_not_labels(self):
"""Test that we can filter by both a label and the absence of another label."""
sync_filter = json.dumps(
{
"room": {
"timeline": {
"types": [EventTypes.Message],
"org.matrix.labels": ["#work"],
"org.matrix.not_labels": ["#notfun"],
}
}
}
)
events = self._test_sync_filter_labels(sync_filter)
self.assertEqual(len(events), 1, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with wrong label", events[0])
def _test_sync_filter_labels(self, sync_filter):
user_id = self.register_user("kermit", "test")
tok = self.login("kermit", "test")
room_id = self.helper.create_room_as(user_id, tok=tok)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "without label"},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with wrong label",
EventContentFields.LABELS: ["#work"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with two wrong labels",
EventContentFields.LABELS: ["#work", "#notfun"],
},
tok=tok,
)
self.helper.send_event(
room_id=room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
tok=tok,
)
request, channel = self.make_request(
"GET", "/sync?filter=%s" % sync_filter, access_token=tok
)
self.render(request)
self.assertEqual(channel.code, 200, channel.result)
return channel.json_body["rooms"]["join"][room_id]["timeline"]["events"]
class SyncTypingTests(unittest.HomeserverTestCase): class SyncTypingTests(unittest.HomeserverTestCase):
servlets = [ servlets = [