Add basic full text search impl.

This commit is contained in:
Erik Johnston 2015-10-09 15:48:31 +01:00
parent db6e1e1fe3
commit c85c912562
8 changed files with 268 additions and 1 deletions

View file

@ -40,6 +40,7 @@ from .filtering import FilteringStore
from .end_to_end_keys import EndToEndKeyStore
from .receipts import ReceiptsStore
from .search import SearchStore
import fnmatch
@ -79,6 +80,7 @@ class DataStore(RoomMemberStore, RoomStore,
EventsStore,
ReceiptsStore,
EndToEndKeyStore,
SearchStore,
):
def __init__(self, hs):

View file

@ -519,7 +519,7 @@ class SQLBaseStore(object):
allow_none=False,
desc="_simple_select_one_onecol"):
"""Executes a SELECT query on the named table, which is expected to
return a single row, returning a single column from it."
return a single row, returning a single column from it.
Args:
table : string giving the table name

View file

@ -0,0 +1,57 @@
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from synapse.storage import get_statements
from synapse.storage.engines import PostgresEngine
logger = logging.getLogger(__name__)
POSTGRES_SQL = """
CREATE TABLE event_search (
event_id TEXT,
room_id TEXT,
key TEXT,
vector tsvector
);
INSERT INTO event_search SELECT
event_id, room_id, 'content.body',
to_tsvector('english', json::json->'content'->>'body')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';
INSERT INTO event_search SELECT
event_id, room_id, 'content.name',
to_tsvector('english', json::json->'content'->>'name')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';
INSERT INTO event_search SELECT
event_id, room_id, 'content.topic',
to_tsvector('english', json::json->'content'->>'topic')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
CREATE INDEX event_search_idx ON event_search USING gin(vector);
"""
def run_upgrade(cur, database_engine, *args, **kwargs):
if not isinstance(database_engine, PostgresEngine):
# We only support FTS for postgres currently.
return
for statement in get_statements(POSTGRES_SQL.splitlines()):
cur.execute(statement)

75
synapse/storage/search.py Normal file
View file

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer
from _base import SQLBaseStore
from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes
class SearchStore(SQLBaseStore):
@defer.inlineCallbacks
def search_msgs(self, constraints):
clauses = []
args = []
fts = None
for c in constraints:
local_clauses = []
if c.search_type == SearchConstraintTypes.FTS:
fts = c.value
for key in c.keys:
local_clauses.append("key = ?")
args.append(key)
elif c.search_type == SearchConstraintTypes.EXACT:
for key in c.keys:
if key == KnownRoomEventKeys.ROOM_ID:
for value in c.value:
local_clauses.append("room_id = ?")
args.append(value)
clauses.append(
"(%s)" % (" OR ".join(local_clauses),)
)
sql = (
"SELECT ts_rank_cd(vector, query) AS rank, event_id"
" FROM plainto_tsquery('english', ?) as query, event_search"
" WHERE vector @@ query"
)
for clause in clauses:
sql += " AND " + clause
sql += " ORDER BY rank DESC"
results = yield self._execute(
"search_msgs", self.cursor_to_dict, sql, *([fts] + args)
)
events = yield self._get_events([r["event_id"] for r in results])
event_map = {
ev.event_id: ev
for ev in events
}
defer.returnValue([
{
"rank": r["rank"],
"result": event_map[r["event_id"]]
}
for r in results
if r["event_id"] in event_map
])