Add debug logging for issue #9533 (#9959)

Hopefully this will help us track down where to-device messages are getting
lost/delayed.
This commit is contained in:
Richard van der Hoff 2021-05-11 11:04:03 +01:00 committed by GitHub
parent 7967b36efe
commit b378d98c8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 42 additions and 2 deletions

1
changelog.d/9959.misc Normal file
View File

@ -0,0 +1 @@
Add debug logging for lost/delayed to-device messages.

View File

@ -28,6 +28,7 @@ from synapse.api.presence import UserPresenceState
from synapse.events import EventBase
from synapse.federation.units import Edu
from synapse.handlers.presence import format_user_presence_state
from synapse.logging import issue9533_logger
from synapse.logging.opentracing import SynapseTags, set_tag
from synapse.metrics import sent_transactions_counter
from synapse.metrics.background_process_metrics import run_as_background_process
@ -574,6 +575,14 @@ class PerDestinationQueue:
for content in contents
]
if edus:
issue9533_logger.debug(
"Sending %i to-device messages to %s, up to stream id %i",
len(edus),
self._destination,
stream_id,
)
return (edus, stream_id)
def _start_catching_up(self) -> None:

View File

@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# These are imported to allow for nicer logging configuration files.
import logging
from synapse.logging._remote import RemoteHandler
from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter
# These are imported to allow for nicer logging configuration files.
__all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"]
# Debug logger for https://github.com/matrix-org/synapse/issues/9533 etc
issue9533_logger = logging.getLogger("synapse.9533_debug")

View File

@ -38,6 +38,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership
from synapse.api.errors import AuthError
from synapse.events import EventBase
from synapse.handlers.presence import format_user_presence_state
from synapse.logging import issue9533_logger
from synapse.logging.context import PreserveLoggingContext
from synapse.logging.opentracing import log_kv, start_active_span
from synapse.logging.utils import log_function
@ -426,6 +427,13 @@ class Notifier:
for room in rooms:
user_streams |= self.room_to_user_streams.get(room, set())
if stream_key == "to_device_key":
issue9533_logger.debug(
"to-device messages stream id %s, awaking streams for %s",
new_token,
users,
)
time_now_ms = self.clock.time_msec()
for user_stream in user_streams:
try:

View File

@ -51,7 +51,6 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
# How long we allow callers to wait for replication updates before timing out.
_WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 30

View File

@ -15,6 +15,7 @@
import logging
from typing import List, Optional, Tuple
from synapse.logging import issue9533_logger
from synapse.logging.opentracing import log_kv, set_tag, trace
from synapse.replication.tcp.streams import ToDeviceStream
from synapse.storage._base import SQLBaseStore, db_to_json
@ -404,6 +405,13 @@ class DeviceInboxWorkerStore(SQLBaseStore):
],
)
if remote_messages_by_destination:
issue9533_logger.debug(
"Queued outgoing to-device messages with stream_id %i for %s",
stream_id,
list(remote_messages_by_destination.keys()),
)
async with self._device_inbox_id_gen.get_next() as stream_id:
now_ms = self.clock.time_msec()
await self.db_pool.runInteraction(
@ -533,6 +541,16 @@ class DeviceInboxWorkerStore(SQLBaseStore):
],
)
issue9533_logger.debug(
"Stored to-device messages with stream_id %i for %s",
stream_id,
[
(user_id, device_id)
for (user_id, messages_by_device) in local_by_user_then_device.items()
for device_id in messages_by_device.keys()
],
)
class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"