mirror of
https://github.com/matrix-org/pantalaimon.git
synced 2025-03-29 17:38:09 -04:00
156 lines
4.9 KiB
Python
156 lines
4.9 KiB
Python
# Copyright 2019 The Matrix.org Foundation CIC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import datetime
|
|
|
|
import tantivy
|
|
from nio import (RoomMessageText, RoomNameEvent, RoomTopicEvent,
|
|
RoomMessageMedia, RoomEncryptedMedia)
|
|
|
|
|
|
def sanitize_room_id(room_id):
|
|
return room_id.replace(":", "/").replace("!", "")
|
|
|
|
|
|
class Searcher:
|
|
def __init__(self, index, body_field, name_field, topic_field,
|
|
column_field, room_field, timestamp_field, searcher):
|
|
self._index = index
|
|
self._searcher = searcher
|
|
|
|
self.body_field = body_field
|
|
self.name_field = topic_field
|
|
self.topic_field = name_field
|
|
self.column_field = column_field
|
|
self.room_field = room_field
|
|
self.timestamp_field = timestamp_field
|
|
|
|
def search(self, search_term, room=None, max_results=10, order_by_date=False):
|
|
# type (str, str, int, bool) -> List[int, int]
|
|
"""Search for events in the index.
|
|
|
|
Returns the score and the column id for the event.
|
|
"""
|
|
queryparser = tantivy.QueryParser.for_index(
|
|
self._index,
|
|
[
|
|
self.body_field,
|
|
self.name_field,
|
|
self.topic_field,
|
|
self.room_field
|
|
]
|
|
)
|
|
|
|
# This currently supports only a single room since the query parser
|
|
# doesn't seem to work with multiple room fields here.
|
|
if room:
|
|
search_term = "{} AND room:{}".format(
|
|
search_term,
|
|
sanitize_room_id(room)
|
|
)
|
|
|
|
query = queryparser.parse_query(search_term)
|
|
|
|
if order_by_date:
|
|
collector = tantivy.TopDocs(max_results,
|
|
order_by_field=self.timestamp_field)
|
|
else:
|
|
collector = tantivy.TopDocs(max_results)
|
|
|
|
result = self._searcher.search(query, collector)
|
|
|
|
retrieved_result = []
|
|
|
|
for score, doc_address in result:
|
|
doc = self._searcher.doc(doc_address)
|
|
column = doc.get_first(self.column_field)
|
|
retrieved_result.append((score, column))
|
|
|
|
return retrieved_result
|
|
|
|
|
|
class Index:
|
|
def __init__(self, path=None):
|
|
schema_builder = tantivy.SchemaBuilder()
|
|
|
|
self.body_field = schema_builder.add_text_field("body")
|
|
self.name_field = schema_builder.add_text_field("name")
|
|
self.topic_field = schema_builder.add_text_field("topic")
|
|
|
|
self.timestamp_field = schema_builder.add_unsigned_field(
|
|
"server_timestamp", fast="single"
|
|
)
|
|
self.date_field = schema_builder.add_date_field(
|
|
"message_date"
|
|
)
|
|
self.room_field = schema_builder.add_facet_field("room")
|
|
|
|
self.column_field = schema_builder.add_unsigned_field(
|
|
"database_column",
|
|
indexed=True,
|
|
stored=True,
|
|
fast="single"
|
|
)
|
|
|
|
schema = schema_builder.build()
|
|
|
|
self.index = tantivy.Index(schema, path)
|
|
|
|
self.reader = self.index.reader()
|
|
self.writer = self.index.writer()
|
|
|
|
def add_event(self, column_id, event, room_id):
|
|
doc = tantivy.Document()
|
|
|
|
room_path = "/{}".format(sanitize_room_id(room_id))
|
|
|
|
room_facet = tantivy.Facet.from_string(room_path)
|
|
|
|
doc.add_unsigned(self.column_field, column_id)
|
|
doc.add_facet(self.room_field, room_facet)
|
|
doc.add_date(
|
|
self.date_field,
|
|
datetime.datetime.fromtimestamp(event.server_timestamp / 1000)
|
|
)
|
|
doc.add_unsigned(self.timestamp_field, event.server_timestamp)
|
|
|
|
if isinstance(event, RoomMessageText):
|
|
doc.add_text(self.body_field, event.body)
|
|
elif isinstance(event, (RoomMessageMedia, RoomEncryptedMedia)):
|
|
doc.add_text(self.body_field, event.body)
|
|
elif isinstance(event, RoomNameEvent):
|
|
doc.add_text(self.name_field, event.name)
|
|
elif isinstance(event, RoomTopicEvent):
|
|
doc.add_text(self.topic_field, event.topic)
|
|
else:
|
|
raise ValueError("Invalid event passed.")
|
|
|
|
self.writer.add_document(doc)
|
|
|
|
def commit(self):
|
|
self.writer.commit()
|
|
|
|
def searcher(self):
|
|
self.reader.reload()
|
|
return Searcher(
|
|
self.index,
|
|
self.body_field,
|
|
self.name_field,
|
|
self.topic_field,
|
|
self.column_field,
|
|
self.room_field,
|
|
self.timestamp_field,
|
|
self.reader.searcher()
|
|
)
|