Debug for device lists updates (#11760)

Debug for #8631.

I'm having a hard time tracking down what's going wrong in that issue.
In the reported example, I could see server A sending federation traffic
to server B and all was well. Yet B reports out-of-sync device updates
from A.

I couldn't see what was _in_ the events being sent from A to B. So I
have added some crude logging to track

- when we have updates to send to a remote HS
- the edus we actually accumulate to send
- when a federation transaction includes a device list update edu
- when such an EDU is received

This is a bit of a sledgehammer.
This commit is contained in:
David Robertson 2022-01-20 13:38:44 +00:00 committed by GitHub
parent fa583c2198
commit f160fe18e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 0 deletions

1
changelog.d/11760.misc Normal file
View File

@ -0,0 +1 @@
Add optional debugging to investigate [issue 8631](https://github.com/matrix-org/synapse/issues/8631).

View File

@ -35,6 +35,7 @@ if TYPE_CHECKING:
import synapse.server
logger = logging.getLogger(__name__)
issue_8631_logger = logging.getLogger("synapse.8631_debug")
last_pdu_ts_metric = Gauge(
"synapse_federation_last_sent_pdu_time",
@ -124,6 +125,17 @@ class TransactionManager:
len(pdus),
len(edus),
)
if issue_8631_logger.isEnabledFor(logging.DEBUG):
DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"}
device_list_updates = [
edu.content for edu in edus if edu.edu_type in DEVICE_UPDATE_EDUS
]
if device_list_updates:
issue_8631_logger.debug(
"about to send txn [%s] including device list updates: %s",
transaction.transaction_id,
device_list_updates,
)
# Actually send the transaction

View File

@ -36,6 +36,7 @@ from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.versionstring import get_version_string
logger = logging.getLogger(__name__)
issue_8631_logger = logging.getLogger("synapse.8631_debug")
class BaseFederationServerServlet(BaseFederationServlet):
@ -95,6 +96,20 @@ class FederationSendServlet(BaseFederationServerServlet):
len(transaction_data.get("edus", [])),
)
if issue_8631_logger.isEnabledFor(logging.DEBUG):
DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"}
device_list_updates = [
edu.content
for edu in transaction_data.get("edus", [])
if edu.edu_type in DEVICE_UPDATE_EDUS
]
if device_list_updates:
issue_8631_logger.debug(
"received transaction [%s] including device list updates: %s",
transaction_id,
device_list_updates,
)
except Exception as e:
logger.exception(e)
return 400, {"error": "Invalid transaction"}

View File

@ -53,6 +53,7 @@ if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
issue_8631_logger = logging.getLogger("synapse.8631_debug")
DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
"drop_device_list_streams_non_unique_indexes"
@ -229,6 +230,12 @@ class DeviceWorkerStore(SQLBaseStore):
if not updates:
return now_stream_id, []
if issue_8631_logger.isEnabledFor(logging.DEBUG):
data = {(user, device): stream_id for user, device, stream_id, _ in updates}
issue_8631_logger.debug(
"device updates need to be sent to %s: %s", destination, data
)
# get the cross-signing keys of the users in the list, so that we can
# determine which of the device changes were cross-signing keys
users = {r[0] for r in updates}
@ -365,6 +372,17 @@ class DeviceWorkerStore(SQLBaseStore):
# and remove the length budgeting above.
results.append(("org.matrix.signing_key_update", result))
if issue_8631_logger.isEnabledFor(logging.DEBUG):
for (user_id, edu) in results:
issue_8631_logger.debug(
"device update to %s for %s from %s to %s: %s",
destination,
user_id,
from_stream_id,
last_processed_stream_id,
edu,
)
return last_processed_stream_id, results
def _get_device_updates_by_remote_txn(