Merge pull request #412 from matrix-org/erikj/search

Search: Add prefix matching support
This commit is contained in:
Erik Johnston 2015-12-02 13:56:41 +00:00
commit f721fdbf87

View File

@ -140,7 +140,10 @@ class SearchStore(BackgroundUpdateStore):
list of dicts list of dicts
""" """
clauses = [] clauses = []
args = []
search_query = search_query = _parse_query(self.database_engine, search_term)
args = [search_query]
# Make sure we don't explode because the person is in too many rooms. # Make sure we don't explode because the person is in too many rooms.
# We filter the results below regardless. # We filter the results below regardless.
@ -162,7 +165,7 @@ class SearchStore(BackgroundUpdateStore):
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
sql = ( sql = (
"SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id"
" FROM plainto_tsquery('english', ?) as query, event_search" " FROM to_tsquery('english', ?) as query, event_search"
" WHERE vector @@ query" " WHERE vector @@ query"
) )
elif isinstance(self.database_engine, Sqlite3Engine): elif isinstance(self.database_engine, Sqlite3Engine):
@ -183,7 +186,7 @@ class SearchStore(BackgroundUpdateStore):
sql += " ORDER BY rank DESC LIMIT 500" sql += " ORDER BY rank DESC LIMIT 500"
results = yield self._execute( results = yield self._execute(
"search_msgs", self.cursor_to_dict, sql, *([search_term] + args) "search_msgs", self.cursor_to_dict, sql, *args
) )
results = filter(lambda row: row["room_id"] in room_ids, results) results = filter(lambda row: row["room_id"] in room_ids, results)
@ -197,7 +200,7 @@ class SearchStore(BackgroundUpdateStore):
highlights = None highlights = None
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = yield self._find_highlights_in_postgres(search_term, events) highlights = yield self._find_highlights_in_postgres(search_query, events)
defer.returnValue({ defer.returnValue({
"results": [ "results": [
@ -226,7 +229,10 @@ class SearchStore(BackgroundUpdateStore):
list of dicts list of dicts
""" """
clauses = [] clauses = []
args = [search_term]
search_query = search_query = _parse_query(self.database_engine, search_term)
args = [search_query]
# Make sure we don't explode because the person is in too many rooms. # Make sure we don't explode because the person is in too many rooms.
# We filter the results below regardless. # We filter the results below regardless.
@ -263,7 +269,7 @@ class SearchStore(BackgroundUpdateStore):
sql = ( sql = (
"SELECT ts_rank_cd(vector, query) as rank," "SELECT ts_rank_cd(vector, query) as rank,"
" origin_server_ts, stream_ordering, room_id, event_id" " origin_server_ts, stream_ordering, room_id, event_id"
" FROM plainto_tsquery('english', ?) as query, event_search" " FROM to_tsquery('english', ?) as query, event_search"
" NATURAL JOIN events" " NATURAL JOIN events"
" WHERE vector @@ query AND " " WHERE vector @@ query AND "
) )
@ -313,7 +319,7 @@ class SearchStore(BackgroundUpdateStore):
highlights = None highlights = None
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = yield self._find_highlights_in_postgres(search_term, events) highlights = yield self._find_highlights_in_postgres(search_query, events)
defer.returnValue({ defer.returnValue({
"results": [ "results": [
@ -330,7 +336,7 @@ class SearchStore(BackgroundUpdateStore):
"highlights": highlights, "highlights": highlights,
}) })
def _find_highlights_in_postgres(self, search_term, events): def _find_highlights_in_postgres(self, search_query, events):
"""Given a list of events and a search term, return a list of words """Given a list of events and a search term, return a list of words
that match from the content of the event. that match from the content of the event.
@ -338,7 +344,7 @@ class SearchStore(BackgroundUpdateStore):
highlight the matching parts. highlight the matching parts.
Args: Args:
search_term (str) search_query (str)
events (list): A list of events events (list): A list of events
Returns: Returns:
@ -370,14 +376,14 @@ class SearchStore(BackgroundUpdateStore):
while stop_sel in value: while stop_sel in value:
stop_sel += ">" stop_sel += ">"
query = "SELECT ts_headline(?, plainto_tsquery('english', ?), %s)" % ( query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % (
_to_postgres_options({ _to_postgres_options({
"StartSel": start_sel, "StartSel": start_sel,
"StopSel": stop_sel, "StopSel": stop_sel,
"MaxFragments": "50", "MaxFragments": "50",
}) })
) )
txn.execute(query, (value, search_term,)) txn.execute(query, (value, search_query,))
headline, = txn.fetchall()[0] headline, = txn.fetchall()[0]
# Now we need to pick the possible highlights out of the haedline # Now we need to pick the possible highlights out of the haedline
@ -399,3 +405,22 @@ def _to_postgres_options(options_dict):
return "'%s'" % ( return "'%s'" % (
",".join("%s=%s" % (k, v) for k, v in options_dict.items()), ",".join("%s=%s" % (k, v) for k, v in options_dict.items()),
) )
def _parse_query(database_engine, search_term):
"""Takes a plain unicode string from the user and converts it into a form
that can be passed to database.
We use this so that we can add prefix matching, which isn't something
that is supported by default.
"""
# Pull out the individual words, discarding any non-word characters.
results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
if isinstance(database_engine, PostgresEngine):
return " & ".join(result + ":*" for result in results)
elif isinstance(database_engine, Sqlite3Engine):
return " & ".join(result + "*" for result in results)
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")