Improved WatchValue

This commit is contained in:
Christien Rioux 2025-04-21 14:05:44 -04:00
parent 72b1434abc
commit e6c7c28746
89 changed files with 1891892 additions and 1807 deletions

View file

@ -1,5 +1,4 @@
# Routing context veilid tests
from typing import Any, Awaitable, Callable, Optional
import pytest
import asyncio
@ -7,7 +6,8 @@ import time
import os
import veilid
from veilid import ValueSubkey
from veilid import ValueSubkey, Timestamp, SafetySelection
from veilid.types import VeilidJSONEncoder
##################################################################
BOGUS_KEY = veilid.TypedKey.from_value(
@ -86,8 +86,8 @@ async def test_set_get_dht_value(api_connection: veilid.VeilidAPI):
vd4 = await rc.get_dht_value(rec.key, ValueSubkey(1), False)
assert vd4 is None
print("vd2: {}", vd2.__dict__)
print("vd3: {}", vd3.__dict__)
#print("vd2: {}", vd2.__dict__)
#print("vd3: {}", vd3.__dict__)
assert vd2 == vd3
@ -245,8 +245,7 @@ async def test_open_writer_dht_value(api_connection: veilid.VeilidAPI):
await rc.delete_dht_record(key)
# @pytest.mark.skipif(os.getenv("INTEGRATION") != "1", reason="integration test requires two servers running")
@pytest.mark.skip(reason = "don't work yet")
@pytest.mark.skipif(os.getenv("INTEGRATION") != "1", reason="integration test requires two servers running")
@pytest.mark.asyncio
async def test_watch_dht_values():
@ -256,112 +255,256 @@ async def test_watch_dht_values():
if update.kind == veilid.VeilidUpdateKind.VALUE_CHANGE:
await value_change_queue.put(update)
async def null_update_callback(update: veilid.VeilidUpdate):
pass
try:
api = await veilid.api_connector(value_change_update_callback)
api0 = await veilid.api_connector(value_change_update_callback, 0)
except veilid.VeilidConnectionError:
pytest.skip("Unable to connect to veilid-server.")
pytest.skip("Unable to connect to veilid-server 0.")
# Make two routing contexts, one with and one without safety
# So we can pretend to be a different node and get the watch updates
# Normally they would not get sent if the set comes from the same target
# as the watch's target
# XXX: this logic doesn't work because our node still suppresses updates
# XXX: if the value hasn't changed in the local record store
rcWatch = await api.new_routing_context()
rcSet = await (await api.new_routing_context()).with_safety(veilid.SafetySelection.unsafe())
async with rcWatch, rcSet:
# Make a DHT record
rec = await rcWatch.create_dht_record(veilid.DHTSchema.dflt(10))
try:
api1 = await veilid.api_connector(null_update_callback, 1)
except veilid.VeilidConnectionError:
pytest.skip("Unable to connect to veilid-server 1.")
# Set some subkey we care about
vd = await rcWatch.set_dht_value(rec.key, ValueSubkey(3), b"BLAH BLAH BLAH")
assert vd is None
async with api0, api1:
# purge local and remote record stores to ensure we start fresh
await api0.debug("record purge local")
await api0.debug("record purge remote")
await api1.debug("record purge local")
await api1.debug("record purge remote")
# Make a watch on that subkey
ts = await rcWatch.watch_dht_values(rec.key, [], 0, 0xFFFFFFFF)
assert ts != 0
# Clear the change queue if record purge cancels old watches
while True:
try:
upd = await asyncio.wait_for(value_change_queue.get(), timeout=3)
except asyncio.TimeoutError:
break
# Reopen without closing to change routing context and not lose watch
rec = await rcSet.open_dht_record(rec.key, rec.owner_key_pair())
# Now set the subkey and trigger an update
vd = await rcSet.set_dht_value(rec.key, ValueSubkey(3), b"BLAH")
assert vd is None
# Now we should NOT get an update because the update is the same as our local copy
update = None
try:
update = await asyncio.wait_for(value_change_queue.get(), timeout=5)
except asyncio.TimeoutError:
pass
assert update is None
# make routing contexts
rc0 = await api0.new_routing_context()
rc1 = await api1.new_routing_context()
async with rc0, rc1:
# Now set multiple subkeys and trigger an update
vd = await asyncio.gather(*[rcSet.set_dht_value(rec.key, ValueSubkey(3), b"BLAH BLAH"), rcSet.set_dht_value(rec.key, ValueSubkey(4), b"BZORT")])
assert vd == [None, None]
# Server 0: Make a DHT record
rec0 = await rc0.create_dht_record(veilid.DHTSchema.dflt(10))
# Wait for the update
upd = await asyncio.wait_for(value_change_queue.get(), timeout=5)
# Server 0: Set some subkey we care about
vd = await rc0.set_dht_value(rec0.key, ValueSubkey(3), b"BLAH")
assert vd is None
# Verify the update came back but we don't get a new value because the sequence number is the same
assert upd.detail.key == rec.key
assert upd.detail.count == 0xFFFFFFFD
assert upd.detail.subkeys == [(3, 4)]
assert upd.detail.value is None
await sync(rc0, [rec0])
# Reopen without closing to change routing context and not lose watch
rec = await rcWatch.open_dht_record(rec.key, rec.owner_key_pair())
# Server 0: Make a watch on all the subkeys
active = await rc0.watch_dht_values(rec0.key, [], Timestamp(0), 0xFFFFFFFF)
assert active
# Cancel some subkeys we don't care about
still_active = await rcWatch.cancel_dht_watch(rec.key, [(ValueSubkey(0), ValueSubkey(2))])
assert still_active
# Server 1: Open the subkey
rec1 = await rc1.open_dht_record(rec0.key, rec0.owner_key_pair())
# Reopen without closing to change routing context and not lose watch
rec = await rcSet.open_dht_record(rec.key, rec.owner_key_pair())
# Server 1: Now set the subkey and trigger an update
vd = await rc1.set_dht_value(rec1.key, ValueSubkey(3), b"BLAH")
assert vd is None
await sync(rc1, [rec1])
# Now set multiple subkeys and trigger an update
vd = await asyncio.gather(*[rcSet.set_dht_value(rec.key, ValueSubkey(3), b"BLAH BLAH BLAH"), rcSet.set_dht_value(rec.key, ValueSubkey(5), b"BZORT BZORT")])
assert vd == [None, None]
# Server 0: Now we should NOT get an update because the update is the same as our local copy
upd = None
try:
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
except asyncio.TimeoutError:
pass
assert upd is None
# Wait for the update, this longer timeout seems to help the flaky check below
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
# Server 1: Now set subkey and trigger an update
vd = await rc1.set_dht_value(rec1.key, ValueSubkey(3), b"BLAH BLAH")
assert vd is None
await sync(rc1, [rec1])
# Verify the update came back but we don't get a new value because the sequence number is the same
assert upd.detail.key == rec.key
# Server 0: Wait for the update
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
# This check is flaky on slow connections and often fails with different counts
assert upd.detail.count == 0xFFFFFFFC
assert upd.detail.subkeys == [(3, 3), (5, 5)]
assert upd.detail.value is None
# Server 0: Verify the update came back with the first changed subkey's data
assert upd.detail.key == rec0.key
assert upd.detail.count == 0xFFFFFFFE
assert upd.detail.subkeys == [(3, 3)]
assert upd.detail.value.data == b"BLAH BLAH"
# Reopen without closing to change routing context and not lose watch
rec = await rcWatch.open_dht_record(rec.key, rec.owner_key_pair())
# Server 1: Now set subkey and trigger an update
vd = await rc1.set_dht_value(rec1.key, ValueSubkey(4), b"BZORT")
assert vd is None
await sync(rc1, [rec1])
# Now cancel the update
still_active = await rcWatch.cancel_dht_watch(rec.key, [(ValueSubkey(3), ValueSubkey(9))])
assert not still_active
# Server 0: Wait for the update
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
# Reopen without closing to change routing context and not lose watch
rec = await rcSet.open_dht_record(rec.key, rec.owner_key_pair())
# Server 0: Verify the update came back with the first changed subkey's data
assert upd.detail.key == rec0.key
assert upd.detail.count == 0xFFFFFFFD
assert upd.detail.subkeys == [(4, 4)]
assert upd.detail.value.data == b"BZORT"
# Now set multiple subkeys
vd = await asyncio.gather(*[rcSet.set_dht_value(rec.key, ValueSubkey(3), b"BLAH BLAH BLAH BLAH"), rcSet.set_dht_value(rec.key, ValueSubkey(5), b"BZORT BZORT BZORT")])
assert vd == [None, None]
# Now we should NOT get an update
update = None
try:
update = await asyncio.wait_for(value_change_queue.get(), timeout=5)
except asyncio.TimeoutError:
pass
assert update is None
# Server 0: Cancel some subkeys we don't care about
active = await rc0.cancel_dht_watch(rec0.key, [(ValueSubkey(0), ValueSubkey(3))])
assert active
# Clean up
await rcSet.close_dht_record(rec.key)
await rcSet.delete_dht_record(rec.key)
# Server 1: Now set multiple subkeys and trigger an update
vd = await asyncio.gather(*[rc1.set_dht_value(rec1.key, ValueSubkey(3), b"BLAH BLAH BLAH"), rc1.set_dht_value(rec1.key, ValueSubkey(4), b"BZORT BZORT")])
assert vd == [None, None]
await sync(rc1, [rec1])
# Server 0: Wait for the update
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
# Server 0: Verify only one update came back
assert upd.detail.key == rec0.key
assert upd.detail.count == 0xFFFFFFFC
assert upd.detail.subkeys == [(4, 4)]
assert upd.detail.value.data == b"BZORT BZORT"
# Server 0: Now we should NOT get any other update
upd = None
try:
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
except asyncio.TimeoutError:
pass
if upd is not None:
print(f"bad update: {VeilidJSONEncoder.dumps(upd)}")
assert upd is None
# Now cancel the update
active = await rc0.cancel_dht_watch(rec0.key, [(ValueSubkey(3), ValueSubkey(9))])
assert not active
# Server 0: Wait for the cancellation update
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
# Server 0: Verify only one update came back
assert upd.detail.key == rec0.key
assert upd.detail.count == 0
assert upd.detail.subkeys == []
assert upd.detail.value is None
# Now set multiple subkeys
vd = await asyncio.gather(*[rc1.set_dht_value(rec1.key, ValueSubkey(3), b"BLAH BLAH BLAH BLAH"), rc1.set_dht_value(rec1.key, ValueSubkey(5), b"BZORT BZORT BZORT")])
assert vd == [None, None]
await sync(rc1, [rec1])
# Now we should NOT get an update
upd = None
try:
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
except asyncio.TimeoutError:
pass
if upd is not None:
print(f"bad update: {VeilidJSONEncoder.dumps(upd)}")
assert upd is None
# Clean up
await rc1.close_dht_record(rec1.key)
await rc1.delete_dht_record(rec1.key)
await rc0.close_dht_record(rec0.key)
await rc0.delete_dht_record(rec0.key)
@pytest.mark.skipif(os.getenv("INTEGRATION") != "1", reason="integration test requires two servers running")
@pytest.mark.skipif(os.getenv("STRESS") != "1", reason="stress test takes a long time")
@pytest.mark.asyncio
async def test_watch_many_dht_values():
value_change_queue: asyncio.Queue[veilid.VeilidUpdate] = asyncio.Queue()
async def value_change_update_callback(update: veilid.VeilidUpdate):
if update.kind == veilid.VeilidUpdateKind.VALUE_CHANGE:
await value_change_queue.put(update)
async def null_update_callback(update: veilid.VeilidUpdate):
pass
try:
api0 = await veilid.api_connector(value_change_update_callback, 0)
except veilid.VeilidConnectionError:
pytest.skip("Unable to connect to veilid-server 0.")
try:
api1 = await veilid.api_connector(null_update_callback, 1)
except veilid.VeilidConnectionError:
pytest.skip("Unable to connect to veilid-server 1.")
async with api0, api1:
# purge local and remote record stores to ensure we start fresh
await api0.debug("record purge local")
await api0.debug("record purge remote")
await api1.debug("record purge local")
await api1.debug("record purge remote")
# make routing contexts
# unsafe version for debugging
rc0 = await (await api0.new_routing_context()).with_safety(SafetySelection.unsafe())
rc1 = await (await api1.new_routing_context()).with_safety(SafetySelection.unsafe())
# safe default version
# rc0 = await api0.new_routing_context()
# rc1 = await api1.new_routing_context()
async with rc0, rc1:
COUNT = 10
records = []
# Make and watch all records
for n in range(COUNT):
print(f"making record {n}")
# Server 0: Make a DHT record
records.append(await rc0.create_dht_record(veilid.DHTSchema.dflt(1)))
# Server 0: Set some subkey we care about
vd = await rc0.set_dht_value(records[n].key, ValueSubkey(0), b"BLAH")
assert vd is None
# Server 0: Make a watch on all the subkeys
active = await rc0.watch_dht_values(records[n].key, [], Timestamp(0), 0xFFFFFFFF)
assert active
# Open and set all records
missing_records = set()
for (n, record) in enumerate(records):
print(f"setting record {n}")
# Server 1: Open the subkey
_ignore = await rc1.open_dht_record(record.key, record.owner_key_pair())
# Server 1: Now set the subkey and trigger an update
vd = await rc1.set_dht_value(record.key, ValueSubkey(0), b"BLAH BLAH")
assert vd is None
missing_records.add(record.key)
# Server 0: Now we should get an update for every change
for n in range(len(records)):
print(f"waiting for change {n}")
# Server 0: Wait for the update
try:
upd = await asyncio.wait_for(value_change_queue.get(), timeout=10)
missing_records.remove(upd.detail.key)
except:
# Dump which records didn't get updates
for (m, record) in enumerate(records):
if record.key not in missing_records:
continue
print(f"missing update for record {m}: {record}")
info0 = await api0.debug(f"record info {record.key}")
info1 = await api1.debug(f"record info {record.key}")
print(f"from rc0: {info0}")
print(f"from rc1: {info1}")
raise
# Clean up
for record in records:
await rc1.close_dht_record(record.key)
await rc1.delete_dht_record(record.key)
await rc0.close_dht_record(record.key)
await rc0.delete_dht_record(record.key)
@pytest.mark.asyncio
async def test_inspect_dht_record(api_connection: veilid.VeilidAPI):
@ -486,8 +629,6 @@ async def test_schema_limit_smpl(api_connection: veilid.VeilidAPI):
@pytest.mark.skipif(os.getenv("INTEGRATION") != "1", reason="integration test requires two servers running")
@pytest.mark.asyncio
async def test_dht_integration_writer_reader():
@ -702,21 +843,23 @@ async def test_dht_write_read_full_subkeys_local():
async def sync(rc: veilid.RoutingContext, records: list[veilid.DHTRecordDescriptor]):
print('syncing records to the network')
syncrecords = records.copy()
while len(syncrecords) > 0:
if len(syncrecords) == 0:
return
while True:
donerecords = set()
subkeysleft = 0
for desc in records:
rr = await rc.inspect_dht_record(desc.key, [])
left = 0; [left := left + (x[1]-x[0]+1) for x in rr.offline_subkeys]
if left == 0:
if veilid.ValueSeqNum.NONE not in rr.local_seqs:
donerecords.add(desc)
donerecords.add(desc)
else:
subkeysleft += left
syncrecords = [x for x in syncrecords if x not in donerecords]
print(f' {len(syncrecords)} records {subkeysleft} subkeys left')
if len(syncrecords) == 0:
break
print(f' syncing {len(syncrecords)} records {subkeysleft} subkeys left')
time.sleep(1)

View file

@ -92,17 +92,17 @@ class RoutingContext(ABC):
async def watch_dht_values(
self,
key: types.TypedKey,
subkeys: list[tuple[types.ValueSubkey, types.ValueSubkey]],
expiration: types.Timestamp = 0,
subkeys: list[tuple[types.ValueSubkey, types.ValueSubkey]] = [],
expiration: types.Timestamp = types.Timestamp(0),
count: int = 0xFFFFFFFF,
) -> types.Timestamp:
) -> bool:
pass
@abstractmethod
async def cancel_dht_watch(
self,
key: types.TypedKey,
subkeys: list[tuple[types.ValueSubkey, types.ValueSubkey]],
subkeys: list[tuple[types.ValueSubkey, types.ValueSubkey]] = [],
) -> bool:
pass

View file

@ -740,10 +740,10 @@ class _JsonRoutingContext(RoutingContext):
async def watch_dht_values(
self,
key: TypedKey,
subkeys: list[tuple[ValueSubkey, ValueSubkey]],
expiration: Timestamp = 0,
subkeys: list[tuple[ValueSubkey, ValueSubkey]] = [],
expiration: Timestamp = Timestamp(0),
count: int = 0xFFFFFFFF,
) -> Timestamp:
) -> bool:
assert isinstance(key, TypedKey)
assert isinstance(subkeys, list)
for s in subkeys:
@ -753,23 +753,22 @@ class _JsonRoutingContext(RoutingContext):
assert isinstance(expiration, Timestamp)
assert isinstance(count, int)
return Timestamp(
raise_api_result(
await self.api.send_ndjson_request(
Operation.ROUTING_CONTEXT,
validate=validate_rc_op,
rc_id=self.rc_id,
rc_op=RoutingContextOperation.WATCH_DHT_VALUES,
key=key,
subkeys=subkeys,
expiration=str(expiration),
count=count,
)
return raise_api_result(
await self.api.send_ndjson_request(
Operation.ROUTING_CONTEXT,
validate=validate_rc_op,
rc_id=self.rc_id,
rc_op=RoutingContextOperation.WATCH_DHT_VALUES,
key=key,
subkeys=subkeys,
expiration=str(expiration),
count=count,
)
)
async def cancel_dht_watch(
self, key: TypedKey, subkeys: list[tuple[ValueSubkey, ValueSubkey]]
self, key: TypedKey, subkeys: list[tuple[ValueSubkey, ValueSubkey]] = []
) -> bool:
assert isinstance(key, TypedKey)
assert isinstance(subkeys, list)

View file

@ -858,7 +858,7 @@
],
"properties": {
"value": {
"type": "string"
"type": "boolean"
}
}
},

View file

@ -469,20 +469,23 @@
{
"type": "object",
"required": [
"count",
"expiration",
"key",
"rc_op",
"subkeys"
"rc_op"
],
"properties": {
"count": {
"type": "integer",
"type": [
"integer",
"null"
],
"format": "uint32",
"minimum": 0.0
},
"expiration": {
"type": "string"
"type": [
"string",
"null"
]
},
"key": {
"type": "string"
@ -494,7 +497,10 @@
]
},
"subkeys": {
"type": "array",
"type": [
"array",
"null"
],
"items": {
"type": "array",
"items": [
@ -519,8 +525,7 @@
"type": "object",
"required": [
"key",
"rc_op",
"subkeys"
"rc_op"
],
"properties": {
"key": {
@ -533,7 +538,10 @@
]
},
"subkeys": {
"type": "array",
"type": [
"array",
"null"
],
"items": {
"type": "array",
"items": [
@ -558,9 +566,7 @@
"type": "object",
"required": [
"key",
"rc_op",
"scope",
"subkeys"
"rc_op"
],
"properties": {
"key": {
@ -573,10 +579,18 @@
]
},
"scope": {
"$ref": "#/definitions/DHTReportScope"
"default": "Local",
"allOf": [
{
"$ref": "#/definitions/DHTReportScope"
}
]
},
"subkeys": {
"type": "array",
"type": [
"array",
"null"
],
"items": {
"type": "array",
"items": [

View file

@ -59,6 +59,8 @@ class VeilidStateAttachment:
j["attached_uptime"],
)
def to_json(self) -> dict:
return self.__dict__
class AnswerStats:
@ -114,6 +116,9 @@ class AnswerStats:
j["consecutive_lost_answers_minimum"],
)
def to_json(self) -> dict:
return self.__dict__
class RPCStats:
messages_sent: int
messages_rcvd: int
@ -172,6 +177,9 @@ class RPCStats:
AnswerStats.from_json(j["answer_ordered"]),
)
def to_json(self) -> dict:
return self.__dict__
class LatencyStats:
fastest: TimestampDuration
@ -213,6 +221,9 @@ class LatencyStats:
TimestampDuration(j["p75"]),
)
def to_json(self) -> dict:
return self.__dict__
class TransferStats:
total: ByteCount
@ -365,6 +376,9 @@ class PeerStats:
StateStats.from_json(j["state"]),
)
def to_json(self) -> dict:
return self.__dict__
class PeerTableData:
node_ids: list[str]
@ -381,6 +395,9 @@ class PeerTableData:
"""JSON object hook"""
return cls(j["node_ids"], j["peer_address"], PeerStats.from_json(j["peer_stats"]))
def to_json(self) -> dict:
return self.__dict__
class VeilidStateNetwork:
started: bool
@ -410,6 +427,9 @@ class VeilidStateNetwork:
[PeerTableData.from_json(peer) for peer in j["peers"]],
)
def to_json(self) -> dict:
return self.__dict__
class VeilidStateConfig:
config: VeilidConfig
@ -422,6 +442,9 @@ class VeilidStateConfig:
"""JSON object hook"""
return cls(VeilidConfig.from_json(j["config"]))
def to_json(self) -> dict:
return self.__dict__
class VeilidState:
attachment: VeilidStateAttachment
@ -447,6 +470,9 @@ class VeilidState:
VeilidStateConfig.from_json(j["config"]),
)
def to_json(self) -> dict:
return self.__dict__
class VeilidLog:
log_level: VeilidLogLevel
@ -463,6 +489,9 @@ class VeilidLog:
"""JSON object hook"""
return cls(VeilidLogLevel(j["log_level"]), j["message"], j["backtrace"])
def to_json(self) -> dict:
return self.__dict__
class VeilidAppMessage:
sender: Optional[TypedKey]
@ -483,6 +512,9 @@ class VeilidAppMessage:
urlsafe_b64decode_no_pad(j["message"]),
)
def to_json(self) -> dict:
return self.__dict__
class VeilidAppCall:
sender: Optional[TypedKey]
@ -506,6 +538,9 @@ class VeilidAppCall:
OperationId(j["call_id"]),
)
def to_json(self) -> dict:
return self.__dict__
class VeilidRouteChange:
dead_routes: list[RouteId]
@ -523,6 +558,9 @@ class VeilidRouteChange:
[RouteId(route) for route in j["dead_remote_routes"]],
)
def to_json(self) -> dict:
return self.__dict__
class VeilidValueChange:
key: TypedKey
@ -546,6 +584,9 @@ class VeilidValueChange:
None if j["value"] is None else ValueData.from_json(j["value"]),
)
def to_json(self) -> dict:
return self.__dict__
class VeilidUpdateKind(StrEnum):
LOG = "Log"
@ -610,3 +651,6 @@ class VeilidUpdate:
case _:
raise ValueError("Unknown VeilidUpdateKind")
return cls(kind, detail)
def to_json(self) -> dict:
return self.__dict__