Merge branch veilid:main into convert_cicd_to_python

2025-04-19 15:25:54 -04:00 · 2025-03-16 16:20:05 +00:00 · 2025-03-16 16:20:05 +00:00 · 4b82734c23
commit 4b82734c23
parent bc052babfb ab3cf25647
63 changed files with 2649 additions and 1488 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/README-pt_BR.md
+++ b/README-pt_BR.md
@ -67,7 +67,7 @@ async fn main() {
    };

    let veilid = veilid_core::api_startup_config(update_callback, config).await.unwrap();
-    println!("Node ID: {}", veilid.config().unwrap().get_veilid_state().config.network.routing_table.node_id);
+    println!("Node ID: {}", veilid.config().unwrap().get().network.routing_table.node_id);
    veilid.attach().await.unwrap();
    // Until CTRL+C is pressed, keep running
    tokio::signal::ctrl_c().await.unwrap();
--- a/README.md
+++ b/README.md
@ -66,7 +66,7 @@ async fn main() {
    };

    let veilid = veilid_core::api_startup_config(update_callback, config).await.unwrap();
-    println!("Node ID: {}", veilid.config().unwrap().get_veilid_state().config.network.routing_table.node_id);
+    println!("Node ID: {}", veilid.config().unwrap().get().network.routing_table.node_id);
    veilid.attach().await.unwrap();
    // Until CTRL+C is pressed, keep running
    tokio::signal::ctrl_c().await.unwrap();
--- a/veilid-cli/Cargo.toml
+++ b/veilid-cli/Cargo.toml
@ -27,12 +27,12 @@ rt-async-std = [
 rt-tokio = ["tokio", "tokio-util", "veilid-tools/rt-tokio", "cursive/rt-tokio"]

 [dependencies]
-async-std = { version = "1.12.0", features = [
+async-std = { version = "1.13.0", features = [
    "unstable",
    "attributes",
 ], optional = true }
-tokio = { version = "1.38.1", features = ["full", "tracing"], optional = true }
-tokio-util = { version = "0.7.11", features = ["compat"], optional = true }
+tokio = { version = "1.43.0", features = ["full", "tracing"], optional = true }
+tokio-util = { version = "0.7.13", features = ["compat"], optional = true }
 async-tungstenite = { version = "^0.23" }
 cursive = { git = "https://gitlab.com/veilid/cursive.git", default-features = false, features = [
    "crossterm",
@ -44,7 +44,7 @@ cursive_buffered_backend = { git = "https://gitlab.com/veilid/cursive-buffered-b
 # cursive-multiplex = "0.6.0"
 # cursive_tree_view = "0.6.0"
 cursive_table_view = { git = "https://gitlab.com/veilid/cursive-table-view.git" }
-arboard = { version = "3.4.0", default-features = false }
+arboard = { version = "3.4.1", default-features = false }
 # cursive-tabs = "0.5.0"
 clap = { version = "4", features = ["derive"] }
 directories = "^5"
@ -68,12 +68,12 @@ flume = { version = "^0", features = ["async"] }
 data-encoding = { version = "^2" }
 indent = { version = "0.1.1" }

-chrono = "0.4.38"
+chrono = "0.4.40"
 owning_ref = "0.4.1"
-unicode-width = "0.1.13"
+unicode-width = "0.1.14"
 lru = "0.10.1"
-rustyline-async = "0.4.2"
-console = "0.15.8"
+rustyline-async = "0.4.5"
+console = "0.15.11"

 [dev-dependencies]
 serial_test = "^2"
--- a/veilid-core/Cargo.toml
+++ b/veilid-core/Cargo.toml
@ -71,8 +71,8 @@ veilid-tools = { version = "0.4.3", path = "../veilid-tools", features = [
    "tracing",
 ], default-features = false }
 paste = "1.0.15"
-once_cell = "1.19.0"
-backtrace = "0.3.71"
+once_cell = "1.20.3"
+backtrace = "^0.3.71"
 num-traits = "0.2.19"
 shell-words = "1.1.0"
 static_assertions = "1.1.0"
@ -82,14 +82,14 @@ lazy_static = "1.5.0"
 directories = "5.0.1"

 # Logging
-tracing = { version = "0.1.40", features = ["log", "attributes"] }
-tracing-subscriber = "0.3.18"
-tracing-error = "0.2.0"
+tracing = { version = "0.1.41", features = ["log", "attributes"] }
+tracing-subscriber = "0.3.19"
+tracing-error = "0.2.1"
 eyre = "0.6.12"
-thiserror = "1.0.63"
+thiserror = "1.0.69"

 # Data structures
-enumset = { version = "1.1.3", features = ["serde"] }
+enumset = { version = "1.1.5", features = ["serde"] }
 keyvaluedb = "0.1.2"
 range-set-blaze = "0.1.16"
 weak-table = "0.3.2"
@ -98,10 +98,10 @@ hashlink = { package = "veilid-hashlink", version = "0.1.1", features = [
 ] }

 # System
-futures-util = { version = "0.3.30", default-features = false, features = [
+futures-util = { version = "0.3.31", default-features = false, features = [
    "alloc",
 ] }
-flume = { version = "0.11.0", features = ["async"] }
+flume = { version = "0.11.1", features = ["async"] }
 parking_lot = "0.12.3"
 lock_api = "0.4.12"
 stop-token = { version = "0.7.0", default-features = false }
@ -124,23 +124,23 @@ curve25519-dalek = { version = "4.1.3", default-features = false, features = [
    "zeroize",
    "precomputed-tables",
 ] }
-blake3 = { version = "1.5.3" }
+blake3 = { version = "1.6.1" }
 chacha20poly1305 = "0.10.1"
 chacha20 = "0.9.1"
 argon2 = "0.5.3"

 # Network
-async-std-resolver = { version = "0.24.1", optional = true }
-hickory-resolver = { version = "0.24.1", optional = true }
+async-std-resolver = { version = "0.24.4", optional = true }
+hickory-resolver = { version = "0.24.4", optional = true }

 # Serialization
-capnp = { version = "0.19.6", default-features = false, features = ["alloc"] }
-serde = { version = "1.0.214", features = ["derive", "rc"] }
-serde_json = { version = "1.0.132" }
+capnp = { version = "0.19.8", default-features = false, features = ["alloc"] }
+serde = { version = "1.0.218", features = ["derive", "rc"] }
+serde_json = { version = "1.0.140" }
 serde-big-array = "0.5.1"
 json = "0.12.4"
-data-encoding = { version = "2.6.0" }
-schemars = "0.8.21"
+data-encoding = { version = "2.8.0" }
+schemars = "0.8.22"
 lz4_flex = { version = "0.11.3", default-features = false, features = [
    "safe-encode",
    "safe-decode",
@ -155,18 +155,18 @@ sanitize-filename = "0.5.0"
 # Tools
 config = { version = "0.13.4", default-features = false, features = ["yaml"] }
 bugsalot = { package = "veilid-bugsalot", version = "0.2.0" }
-chrono = "0.4.38"
-libc = "0.2.155"
+chrono = "0.4.40"
+libc = "0.2.170"
 nix = "0.27.1"
 maxminddb = { version = "0.24.0", optional = true }

 # System
-async-std = { version = "1.12.0", features = ["unstable"], optional = true }
+async-std = { version = "1.13.0", features = ["unstable"], optional = true }
 sysinfo = { version = "^0.30.13", default-features = false }
-tokio = { version = "1.38.1", features = ["full"], optional = true }
-tokio-util = { version = "0.7.11", features = ["compat"], optional = true }
-tokio-stream = { version = "0.1.15", features = ["net"], optional = true }
-futures-util = { version = "0.3.30", default-features = false, features = [
+tokio = { version = "1.43.0", features = ["full"], optional = true }
+tokio-util = { version = "0.7.13", features = ["compat"], optional = true }
+tokio-stream = { version = "0.1.17", features = ["net"], optional = true }
+futures-util = { version = "0.3.31", default-features = false, features = [
    "async-await",
    "sink",
    "std",
@ -201,9 +201,9 @@ async_executors = { version = "0.7.0", default-features = false, features = [
    "bindgen",
    "timer",
 ] }
-wasm-bindgen = "0.2.92"
-js-sys = "0.3.69"
-wasm-bindgen-futures = "0.4.42"
+wasm-bindgen = "0.2.100"
+js-sys = "0.3.77"
+wasm-bindgen-futures = "0.4.50"
 send_wrapper = { version = "0.6.0", features = ["futures"] }
 serde_bytes = { version = "0.11", default-features = false, features = [
    "alloc",
@ -216,14 +216,13 @@ ws_stream_wasm = "0.7.4"

 # Logging
 wasm-logger = "0.2.0"
-tracing-wasm = "0.2.1"

 # Data Structures
 keyvaluedb-web = "0.1.2"

 ### Configuration for WASM32 'web-sys' crate
 [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies.web-sys]
-version = "0.3.69"
+version = "0.3.77"
 features = [
    'Document',
    'HtmlDocument',
@ -263,23 +262,23 @@ tracing-oslog = { version = "0.1.2", optional = true }
 [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dev-dependencies]
 simplelog = { version = "0.12.2", features = ["test"] }
 serial_test = "2.0.0"
-tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
+tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }

 [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies]
 serial_test = { version = "2.0.0", default-features = false, features = [
    "async",
 ] }
-wasm-bindgen-test = "0.3.42"
+wasm-bindgen-test = "0.3.50"
 console_error_panic_hook = "0.1.7"
-wee_alloc = "0.4.5"
 wasm-logger = "0.2.0"
+veilid-tracing-wasm = "^0"

 ### BUILD OPTIONS

 [build-dependencies]
 capnpc = "0.19.0"
-glob = "0.3.1"
-filetime = "0.2.23"
+glob = "0.3.2"
+filetime = "0.2.25"
 sha2 = "0.10.8"
 hex = "0.4.3"
 reqwest = { version = "0.11", features = ["blocking"], optional = true }
--- a/veilid-core/src/lib.rs
+++ b/veilid-core/src/lib.rs
@ -50,6 +50,7 @@ mod logging;
 mod network_manager;
 mod routing_table;
 mod rpc_processor;
+mod stats_accounting;
 mod storage_manager;
 mod table_store;
 mod veilid_api;
@ -64,6 +65,7 @@ pub use self::logging::{
    DEFAULT_LOG_FACILITIES_ENABLED_LIST, DEFAULT_LOG_FACILITIES_IGNORE_LIST,
    DURATION_LOG_FACILITIES, FLAME_LOG_FACILITIES_IGNORE_LIST, VEILID_LOG_KEY_FIELD,
 };
+pub(crate) use self::stats_accounting::*;
 pub use self::veilid_api::*;
 pub use self::veilid_config::*;
 pub use veilid_tools as tools;
--- a/veilid-core/src/logging/api_tracing_layer.rs
+++ b/veilid-core/src/logging/api_tracing_layer.rs
@ -193,16 +193,13 @@ impl<S: Subscriber + for<'a> registry::LookupSpan<'a>> Layer<S> for ApiTracingLa
        attrs.record(&mut new_debug_record);

        if let Some(span_ref) = ctx.span(id) {
-            span_ref
-                .extensions_mut()
-                .insert::<VeilidKeyedStringRecorder>(new_debug_record);
+            let mut extensions_mut = span_ref.extensions_mut();
+            extensions_mut.insert::<VeilidKeyedStringRecorder>(new_debug_record);
            if crate::DURATION_LOG_FACILITIES.contains(&attrs.metadata().target()) {
-                span_ref
-                    .extensions_mut()
-                    .insert::<SpanDuration>(SpanDuration {
-                        start: Timestamp::now(),
-                        end: Timestamp::default(),
-                    });
+                extensions_mut.insert::<SpanDuration>(SpanDuration {
+                    start: Timestamp::now(),
+                    end: Timestamp::default(),
+                });
            }
        }
    }
@ -213,14 +210,14 @@ impl<S: Subscriber + for<'a> registry::LookupSpan<'a>> Layer<S> for ApiTracingLa
            return;
        }
        if let Some(span_ref) = ctx.span(&id) {
-            if let Some(span_duration) = span_ref.extensions_mut().get_mut::<SpanDuration>() {
+            let mut extensions_mut = span_ref.extensions_mut();
+            if let Some(span_duration) = extensions_mut.get_mut::<SpanDuration>() {
                span_duration.end = Timestamp::now();
                let duration = span_duration.end.saturating_sub(span_duration.start);
                let meta = span_ref.metadata();

-                let mut extensions = span_ref.extensions_mut();
                let log_key =
-                    if let Some(span_ksr) = extensions.get_mut::<VeilidKeyedStringRecorder>() {
+                    if let Some(span_ksr) = extensions_mut.get_mut::<VeilidKeyedStringRecorder>() {
                        span_ksr.log_key()
                    } else {
                        ""
@ -254,10 +251,9 @@ impl<S: Subscriber + for<'a> registry::LookupSpan<'a>> Layer<S> for ApiTracingLa
            return;
        }
        if let Some(span_ref) = ctx.span(id) {
-            if let Some(debug_record) = span_ref
-                .extensions_mut()
-                .get_mut::<VeilidKeyedStringRecorder>()
-            {
+            let mut extensions_mut = span_ref.extensions_mut();
+
+            if let Some(debug_record) = extensions_mut.get_mut::<VeilidKeyedStringRecorder>() {
                values.record(debug_record);
            }
        }
--- a/veilid-core/src/logging/facilities.rs
+++ b/veilid-core/src/logging/facilities.rs
@ -289,3 +289,91 @@ macro_rules! veilid_log {
        $($k).+ = $($fields)*
    )};
 }
+
+#[macro_export]
+macro_rules! network_result_value_or_log {
+    ($self:ident $r:expr => $f:expr) => {
+        network_result_value_or_log!($self target: self::__VEILID_LOG_FACILITY, $r => [ "" ] $f )
+    };
+    ($self:ident $r:expr => [ $d:expr ] $f:expr) => {
+        network_result_value_or_log!($self target: self::__VEILID_LOG_FACILITY, $r => [ $d ] $f )
+    };
+    ($self:ident target: $target:expr, $r:expr => $f:expr) => {
+        network_result_value_or_log!($self target: $target, $r => [ "" ] $f )
+    };
+    ($self:ident target: $target:expr, $r:expr => [ $d:expr ] $f:expr) => { {
+        let __extra_message = if debug_target_enabled!("network_result") {
+            $d.to_string()
+        } else {
+            "".to_string()
+        };
+        match $r {
+            NetworkResult::Timeout => {
+                veilid_log!($self debug target: $target,
+                    "{} at {}@{}:{} in {}{}",
+                    "Timeout",
+                    file!(),
+                    line!(),
+                    column!(),
+                    fn_name::uninstantiated!(),
+                    __extra_message
+                );
+                $f
+            }
+            NetworkResult::ServiceUnavailable(ref s) => {
+                veilid_log!($self debug target: $target,
+                    "{}({}) at {}@{}:{} in {}{}",
+                    "ServiceUnavailable",
+                    s,
+                    file!(),
+                    line!(),
+                    column!(),
+                    fn_name::uninstantiated!(),
+                    __extra_message
+                );
+                $f
+            }
+            NetworkResult::NoConnection(ref e) => {
+                veilid_log!($self debug target: $target,
+                    "{}({}) at {}@{}:{} in {}{}",
+                    "No connection",
+                    e.to_string(),
+                    file!(),
+                    line!(),
+                    column!(),
+                    fn_name::uninstantiated!(),
+                    __extra_message
+                );
+                $f
+            }
+            NetworkResult::AlreadyExists(ref e) => {
+                veilid_log!($self debug target: $target,
+                    "{}({}) at {}@{}:{} in {}{}",
+                    "Already exists",
+                    e.to_string(),
+                    file!(),
+                    line!(),
+                    column!(),
+                    fn_name::uninstantiated!(),
+                    __extra_message
+                );
+                $f
+            }
+            NetworkResult::InvalidMessage(ref s) => {
+                veilid_log!($self debug target: $target,
+                    "{}({}) at {}@{}:{} in {}{}",
+                    "Invalid message",
+                    s,
+                    file!(),
+                    line!(),
+                    column!(),
+                    fn_name::uninstantiated!(),
+                    __extra_message
+                );
+                $f
+            }
+            NetworkResult::Value(v) => v,
+        }
+    } };
+
+}
--- a/veilid-core/src/network_manager/connection_manager.rs
+++ b/veilid-core/src/network_manager/connection_manager.rs
@ -8,7 +8,7 @@ impl_veilid_log_facility!("net");

 const PROTECTED_CONNECTION_DROP_SPAN: TimestampDuration = TimestampDuration::new_secs(10);
 const PROTECTED_CONNECTION_DROP_COUNT: usize = 3;
-const NEW_CONNECTION_RETRY_COUNT: usize = 1;
+const NEW_CONNECTION_RETRY_COUNT: usize = 0;
 const NEW_CONNECTION_RETRY_DELAY_MS: u32 = 500;

 ///////////////////////////////////////////////////////////
@ -415,7 +415,19 @@ impl ConnectionManager {
        let best_port = preferred_local_address.map(|pla| pla.port());

        // Async lock on the remote address for atomicity per remote
-        let _lock_guard = self.arc.address_lock_table.lock_tag(remote_addr).await;
+        // Use the initial connection timeout here because multiple calls to get_or_create_connection
+        // can be performed simultaneously and we want to wait for the first one to succeed or not
+        let Ok(_lock_guard) = timeout(
+            self.arc.connection_initial_timeout_ms,
+            self.arc.address_lock_table.lock_tag(remote_addr),
+        )
+        .await
+        else {
+            veilid_log!(self debug "== get_or_create_connection: connection busy, not connecting to dial_info={:?}", dial_info);
+            return Ok(NetworkResult::no_connection_other(
+                "connection endpoint busy",
+            ));
+        };

        veilid_log!(self trace "== get_or_create_connection dial_info={:?}", dial_info);

@ -449,7 +461,8 @@ impl ConnectionManager {
        let mut retry_count = NEW_CONNECTION_RETRY_COUNT;
        let network_manager = self.network_manager();

-        let prot_conn = network_result_try!(loop {
+        let nres = loop {
+            veilid_log!(self trace "== get_or_create_connection connect({}) {:?} -> {}", retry_count, preferred_local_address, dial_info);
            let result_net_res = ProtocolNetworkConnection::connect(
                self.registry(),
                preferred_local_address,
@ -474,12 +487,16 @@ impl ConnectionManager {
                    }
                }
            };
-            veilid_log!(self debug "get_or_create_connection retries left: {}", retry_count);
            retry_count -= 1;

-            // Release the preferred local address if things can't connect due to a low-level collision we dont have a record of
-            preferred_local_address = None;
+            // // XXX: This should not be necessary
+            // // Release the preferred local address if things can't connect due to a low-level collision we dont have a record of
+            // preferred_local_address = None;
            sleep(NEW_CONNECTION_RETRY_DELAY_MS).await;
+        };
+
+        let prot_conn = network_result_value_or_log!(self target:"network_result", nres => [ format!("== get_or_create_connection failed {:?} -> {}", preferred_local_address, dial_info) ] {
+            network_result_raise!(nres);
        });

        // Add to the connection table
@ -598,7 +615,7 @@ impl ConnectionManager {

    // Callback from network connection receive loop when it exits
    // cleans up the entry in the connection table
-    pub(super) async fn report_connection_finished(&self, connection_id: NetworkConnectionId) {
+    pub(super) fn report_connection_finished(&self, connection_id: NetworkConnectionId) {
        // Get channel sender
        let sender = {
            let mut inner = self.arc.inner.lock();
@ -668,7 +685,7 @@ impl ConnectionManager {
                    }
                }
            }
-            let _ = sender.send_async(ConnectionManagerEvent::Dead(conn)).await;
+            let _ = sender.send(ConnectionManagerEvent::Dead(conn));
        }
    }

--- a/veilid-core/src/network_manager/debug.rs
+++ b/veilid-core/src/network_manager/debug.rs
@ -0,0 +1,37 @@
+use super::*;
+
+impl NetworkManager {
+    pub fn debug_info_nodeinfo(&self) -> String {
+        let mut out = String::new();
+        let inner = self.inner.lock();
+        out += &format!(
+            "Relay Worker Dequeue Latency:\n{}",
+            indent_all_string(&inner.stats.relay_worker_dequeue_latency)
+        );
+        out += "\n";
+        out += &format!(
+            "Relay Worker Process Latency:\n{}",
+            indent_all_string(&inner.stats.relay_worker_process_latency)
+        );
+        out
+    }
+
+    pub fn debug(&self) -> String {
+        let stats = self.get_stats();
+
+        let mut out = String::new();
+        out += "Network Manager\n";
+        out += "---------------\n";
+        let mut out = format!(
+            "Transfer stats:\n{}\n",
+            indent_all_string(&stats.self_stats.transfer_stats)
+        );
+        out += &self.debug_info_nodeinfo();
+
+        out += "Node Contact Method Cache\n";
+        out += "-------------------------\n";
+        out += &self.inner.lock().node_contact_method_cache.debug();
+
+        out
+    }
+}
--- a/veilid-core/src/network_manager/mod.rs
+++ b/veilid-core/src/network_manager/mod.rs
@ -10,10 +10,12 @@ mod address_filter;
 mod connection_handle;
 mod connection_manager;
 mod connection_table;
+mod debug;
 mod direct_boot;
 mod network_connection;
 mod node_contact_method_cache;
 mod receipt_manager;
+mod relay_worker;
 mod send_data;
 mod stats;
 mod tasks;
@ -26,9 +28,10 @@ pub mod tests;

 pub use connection_manager::*;
 pub use network_connection::*;
-pub(crate) use node_contact_method_cache::*;
 pub use receipt_manager::*;
 pub use stats::*;
+
+pub(crate) use node_contact_method_cache::*;
 pub(crate) use types::*;

 ////////////////////////////////////////////////////////////////////////////////////////
@ -42,6 +45,7 @@ use hashlink::LruCache;
 use native::*;
 #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
 pub use native::{MAX_CAPABILITIES, PUBLIC_INTERNET_CAPABILITIES};
+use relay_worker::*;
 use routing_table::*;
 use rpc_processor::*;
 #[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
@ -60,6 +64,7 @@ pub const IPADDR_MAX_INACTIVE_DURATION_US: TimestampDuration =
 pub const ADDRESS_FILTER_TASK_INTERVAL_SECS: u32 = 60;
 pub const BOOT_MAGIC: &[u8; 4] = b"BOOT";
 pub const HOLE_PUNCH_DELAY_MS: u32 = 100;
+pub const RELAY_WORKERS_PER_CORE: u32 = 16;

 // Things we get when we start up and go away when we shut down
 // Routing table is not in here because we want it to survive a network shutdown/startup restart
@ -171,7 +176,6 @@ impl Default for NetworkManagerStartupContext {
        Self::new()
    }
 }
-
 // The mutable state of the network manager
 #[derive(Debug)]
 struct NetworkManagerInner {
@ -181,6 +185,11 @@ struct NetworkManagerInner {
    address_check: Option<AddressCheck>,
    peer_info_change_subscription: Option<EventBusSubscription>,
    socket_address_change_subscription: Option<EventBusSubscription>,
+
+    // Relay workers
+    relay_stop_source: Option<StopSource>,
+    relay_send_channel: Option<flume::Sender<RelayWorkerRequest>>,
+    relay_worker_join_handles: Vec<MustJoinHandle<()>>,
 }

 pub(crate) struct NetworkManager {
@ -202,6 +211,10 @@ pub(crate) struct NetworkManager {

    // Startup context
    startup_context: NetworkManagerStartupContext,
+
+    // Relay workers config
+    concurrency: u32,
+    queue_size: u32,
 }

 impl_veilid_component!(NetworkManager);
@ -214,6 +227,8 @@ impl fmt::Debug for NetworkManager {
            .field("address_filter", &self.address_filter)
            .field("network_key", &self.network_key)
            .field("startup_context", &self.startup_context)
+            .field("concurrency", &self.concurrency)
+            .field("queue_size", &self.queue_size)
            .finish()
    }
 }
@ -227,6 +242,10 @@ impl NetworkManager {
            address_check: None,
            peer_info_change_subscription: None,
            socket_address_change_subscription: None,
+            //
+            relay_send_channel: None,
+            relay_stop_source: None,
+            relay_worker_join_handles: Vec::new(),
        }
    }

@ -264,6 +283,26 @@ impl NetworkManager {
            network_key
        };

+        // make local copy of node id for easy access
+        let (concurrency, queue_size) = {
+            let config = registry.config();
+            let c = config.get();
+
+            // set up channel
+            let mut concurrency = c.network.rpc.concurrency;
+            let queue_size = c.network.rpc.queue_size;
+            if concurrency == 0 {
+                concurrency = get_concurrency();
+                if concurrency == 0 {
+                    concurrency = 1;
+                }
+
+                // Default relay concurrency is the number of CPUs * 16 relay workers per core
+                concurrency *= RELAY_WORKERS_PER_CORE;
+            }
+            (concurrency, queue_size)
+        };
+
        let inner = Self::new_inner();
        let address_filter = AddressFilter::new(registry.clone());

@ -282,6 +321,8 @@ impl NetworkManager {
            ),
            network_key,
            startup_context,
+            concurrency,
+            queue_size,
        };

        this.setup_tasks();
@ -360,7 +401,8 @@ impl NetworkManager {
            receipt_manager: receipt_manager.clone(),
        });

-        let address_check = AddressCheck::new(net.clone());
+        // Startup relay workers
+        self.startup_relay_workers()?;

        // Register event handlers
        let peer_info_change_subscription =
@ -371,6 +413,7 @@ impl NetworkManager {

        {
            let mut inner = self.inner.lock();
+            let address_check = AddressCheck::new(net.clone());
            inner.address_check = Some(address_check);
            inner.peer_info_change_subscription = Some(peer_info_change_subscription);
            inner.socket_address_change_subscription = Some(socket_address_change_subscription);
@ -426,6 +469,9 @@ impl NetworkManager {
            inner.address_check = None;
        }

+        // Shutdown relay workers
+        self.shutdown_relay_workers().await;
+
        // Shutdown network components if they started up
        veilid_log!(self debug "shutting down network components");

@ -1099,10 +1145,11 @@ impl NetworkManager {
                    relay_nr.set_sequencing(Sequencing::EnsureOrdered);
                };

-                // Relay the packet to the desired destination
-                veilid_log!(self trace "relaying {} bytes to {}", data.len(), relay_nr);
-                if let Err(e) = pin_future!(self.send_data(relay_nr, data.to_vec())).await {
-                    veilid_log!(self debug "failed to relay envelope: {}" ,e);
+                // Pass relay to RPC system
+                if let Err(e) = self.enqueue_relay(relay_nr, data.to_vec()) {
+                    // Couldn't enqueue, but not the sender's fault
+                    veilid_log!(self debug "failed to enqueue relay: {}", e);
+                    return Ok(false);
                }
            }
            // Inform caller that we dealt with the envelope, but did not process it locally
--- a/veilid-core/src/network_manager/native/discovery_context.rs
+++ b/veilid-core/src/network_manager/native/discovery_context.rs
@ -7,9 +7,6 @@ use stop_token::future::FutureExt as _;

 impl_veilid_log_facility!("net");

-const PORT_MAP_VALIDATE_TRY_COUNT: usize = 3;
-const PORT_MAP_VALIDATE_DELAY_MS: u32 = 500;
-const PORT_MAP_TRY_COUNT: usize = 3;
 const EXTERNAL_INFO_NODE_COUNT: usize = 20;
 const EXTERNAL_INFO_CONCURRENCY: usize = 20;
 const EXTERNAL_INFO_VALIDATIONS: usize = 5;
@ -121,7 +118,8 @@ struct ExternalInfo {
 }

 struct DiscoveryContextInner {
-    external_info: Vec<ExternalInfo>,
+    external_infos: Vec<ExternalInfo>,
+    mapped_dial_info: Option<DialInfo>,
 }

 pub(super) struct DiscoveryContextUnlockedInner {
@ -163,7 +161,8 @@ impl DiscoveryContext {
            registry,
            unlocked_inner: Arc::new(DiscoveryContextUnlockedInner { config, intf_addrs }),
            inner: Arc::new(Mutex::new(DiscoveryContextInner {
-                external_info: Vec::new(),
+                external_infos: Vec::new(),
+                mapped_dial_info: None,
            })),
            stop_token,
        }
@ -405,11 +404,11 @@ impl DiscoveryContext {

        {
            let mut inner = self.inner.lock();
-            inner.external_info = external_address_infos;
+            inner.external_infos = external_address_infos;
            veilid_log!(self debug "External Addresses ({:?}:{:?}):\n{}",
                protocol_type,
                address_type,
-                inner.external_info.iter().map(|x| format!("    {} <- {}",x.address, x.node)).collect::<Vec<_>>().join("\n"));
+                inner.external_infos.iter().map(|x| format!("    {} <- {}",x.address, x.node)).collect::<Vec<_>>().join("\n"));
        }

        true
@ -454,77 +453,87 @@ impl DiscoveryContext {
            AddressType::IPV4 => IGDAddressType::IPV4,
        };

-        let external_1 = self.inner.lock().external_info.first().unwrap().clone();
-
        let igd_manager = self.network_manager().net().igd_manager.clone();
-        let mut tries = 0;
-        loop {
-            tries += 1;

-            // Attempt a port mapping. If this doesn't succeed, it's not going to
-            let mapped_external_address = igd_manager
-                .map_any_port(
-                    igd_protocol_type,
-                    igd_address_type,
-                    local_port,
-                    Some(external_1.address.ip_addr()),
-                )
-                .await?;
+        // Attempt a port mapping. If this doesn't succeed, it's not going to
+        let mapped_external_address = igd_manager
+            .map_any_port(igd_protocol_type, igd_address_type, local_port, None)
+            .await?;

-            // Make dial info from the port mapping
-            let external_mapped_dial_info = self.network_manager().net().make_dial_info(
-                SocketAddress::from_socket_addr(mapped_external_address),
-                protocol_type,
-            );
+        // Make dial info from the port mapping
+        let external_mapped_dial_info = self.network_manager().net().make_dial_info(
+            SocketAddress::from_socket_addr(mapped_external_address),
+            protocol_type,
+        );

-            // Attempt to validate the port mapping
-            let mut validate_tries = 0;
-            loop {
-                validate_tries += 1;
+        Some(external_mapped_dial_info)
+    }

-                // Ensure people can reach us. If we're firewalled off, this is useless
-                if self
-                    .validate_dial_info(
-                        external_1.node.clone(),
-                        external_mapped_dial_info.clone(),
-                        false,
-                    )
-                    .await
-                {
-                    return Some(external_mapped_dial_info);
-                }
-
-                if validate_tries != PORT_MAP_VALIDATE_TRY_COUNT {
-                    veilid_log!(self debug "UPNP port mapping succeeded but port {}/{} is still unreachable.\nretrying\n",
-                    local_port, igd_protocol_type);
-                    sleep(PORT_MAP_VALIDATE_DELAY_MS).await
-                } else {
-                    break;
-                }
-            }
-
-            // Release the mapping if we're still unreachable
-            let _ = igd_manager
-                .unmap_port(
-                    igd_protocol_type,
-                    igd_address_type,
-                    external_1.address.port(),
-                )
-                .await;
-
-            if tries == PORT_MAP_TRY_COUNT {
-                veilid_log!(self warn "UPNP port mapping succeeded but port {}/{} is still unreachable.\nYou may need to add a local firewall allowed port on this machine.\n",
-                    local_port, igd_protocol_type
-                );
-                break;
+    fn matches_mapped_dial_info(&self, dial_info: &DialInfo) -> bool {
+        let mut skip = false;
+        if let Some(mapped_dial_info) = self.inner.lock().mapped_dial_info.as_ref() {
+            if mapped_dial_info == dial_info {
+                skip = true;
            }
        }
-        None
+        skip
    }

    ///////
    // Per-protocol discovery routines

+    // If we know we are not behind NAT, check our firewall status
+    #[instrument(level = "trace", skip(self), ret)]
+    fn protocol_process_mapped_dial_info(
+        &self,
+        all_possibilities: &mut DialInfoClassAllPossibilities,
+        unord: &mut FuturesUnordered<PinBoxFutureStatic<DetectionResultKind>>,
+    ) {
+        let (external_infos, mapped_dial_info) = {
+            let inner = self.inner.lock();
+            let Some(mapped_dial_info) = inner.mapped_dial_info.clone() else {
+                return;
+            };
+
+            (inner.external_infos.clone(), mapped_dial_info)
+        };
+
+        // Have all the external validator nodes check us
+        for external_info in external_infos {
+            let possibilities = vec![(DialInfoClass::Mapped, 1)];
+            all_possibilities.add(&possibilities);
+
+            let this = self.clone();
+            let mapped_dial_info = mapped_dial_info.clone();
+            let do_no_nat_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
+                // Do a validate_dial_info on the external address from a redirected node
+                if this
+                    .validate_dial_info(external_info.node.clone(), mapped_dial_info.clone(), true)
+                    .await
+                {
+                    // Add public dial info with Direct dialinfo class
+                    DetectionResultKind::Result {
+                        possibilities,
+                        result: DetectionResult {
+                            config: this.config,
+                            ddi: DetectedDialInfo::Detected(DialInfoDetail {
+                                dial_info: mapped_dial_info.clone(),
+                                class: DialInfoClass::Mapped,
+                            }),
+                            external_address_types: AddressTypeSet::only(
+                                external_info.address.address_type(),
+                            ),
+                        },
+                    }
+                } else {
+                    DetectionResultKind::Failure { possibilities }
+                }
+            });
+
+            unord.push(do_no_nat_fut);
+        }
+    }
+
    // If we know we are not behind NAT, check our firewall status
    #[instrument(level = "trace", skip(self), ret)]
    fn protocol_process_no_nat(
@ -532,15 +541,20 @@ impl DiscoveryContext {
        all_possibilities: &mut DialInfoClassAllPossibilities,
        unord: &mut FuturesUnordered<PinBoxFutureStatic<DetectionResultKind>>,
    ) {
-        let external_infos = self.inner.lock().external_info.clone();
+        let external_infos = self.inner.lock().external_infos.clone();

        // Have all the external validator nodes check us
        for external_info in external_infos {
-            let this = self.clone();
+            // If this is the same as an existing upnp mapping, skip it, since
+            // we are already validating that
+            if self.matches_mapped_dial_info(&external_info.dial_info) {
+                continue;
+            }

            let possibilities = vec![(DialInfoClass::Direct, 1), (DialInfoClass::Blocked, 1)];
            all_possibilities.add(&possibilities);

+            let this = self.clone();
            let do_no_nat_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
                // Do a validate_dial_info on the external address from a redirected node
                if this
@ -597,7 +611,7 @@ impl DiscoveryContext {
        // Get the external dial info histogram for our use here
        let external_info = {
            let inner = self.inner.lock();
-            inner.external_info.clone()
+            inner.external_infos.clone()
        };
        let local_port = self.config.port;

@ -673,47 +687,51 @@ impl DiscoveryContext {
        // If we have no external address that matches our local port, then lets try that port
        // on our best external address and see if there's a port forward someone added manually
        ///////////
-        let this = self.clone();
        if local_port_matching_external_info.is_none() && best_external_info.is_some() {
            let c_external_1 = best_external_info.as_ref().unwrap().clone();
-            let c_this = this.clone();

-            let possibilities = vec![(DialInfoClass::Direct, 1)];
-            all_possibilities.add(&possibilities);
+            // Do a validate_dial_info on the external address, but with the same port as the local port of local interface, from a redirected node
+            // This test is to see if a node had manual port forwarding done with the same port number as the local listener
+            let mut external_1_dial_info_with_local_port = c_external_1.dial_info.clone();
+            external_1_dial_info_with_local_port.set_port(local_port);

-            let do_manual_map_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
-                // Do a validate_dial_info on the external address, but with the same port as the local port of local interface, from a redirected node
-                // This test is to see if a node had manual port forwarding done with the same port number as the local listener
-                let mut external_1_dial_info_with_local_port = c_external_1.dial_info.clone();
-                external_1_dial_info_with_local_port.set_port(local_port);
+            // If this is the same as an existing upnp mapping, skip it, since
+            // we are already validating that
+            if !self.matches_mapped_dial_info(&external_1_dial_info_with_local_port) {
+                let possibilities = vec![(DialInfoClass::Direct, 1)];
+                all_possibilities.add(&possibilities);

-                if this
-                    .validate_dial_info(
-                        c_external_1.node.clone(),
-                        external_1_dial_info_with_local_port.clone(),
-                        true,
-                    )
-                    .await
-                {
-                    // Add public dial info with Direct dialinfo class
-                    return DetectionResultKind::Result {
-                        possibilities,
-                        result: DetectionResult {
-                            config: c_this.config,
-                            ddi: DetectedDialInfo::Detected(DialInfoDetail {
-                                dial_info: external_1_dial_info_with_local_port,
-                                class: DialInfoClass::Direct,
-                            }),
-                            external_address_types: AddressTypeSet::only(
-                                c_external_1.address.address_type(),
-                            ),
-                        },
-                    };
-                }
+                let c_this = self.clone();
+                let do_manual_map_fut: PinBoxFutureStatic<DetectionResultKind> =
+                    Box::pin(async move {
+                        if c_this
+                            .validate_dial_info(
+                                c_external_1.node.clone(),
+                                external_1_dial_info_with_local_port.clone(),
+                                true,
+                            )
+                            .await
+                        {
+                            // Add public dial info with Direct dialinfo class
+                            return DetectionResultKind::Result {
+                                possibilities,
+                                result: DetectionResult {
+                                    config: c_this.config,
+                                    ddi: DetectedDialInfo::Detected(DialInfoDetail {
+                                        dial_info: external_1_dial_info_with_local_port,
+                                        class: DialInfoClass::Direct,
+                                    }),
+                                    external_address_types: AddressTypeSet::only(
+                                        c_external_1.address.address_type(),
+                                    ),
+                                },
+                            };
+                        }

-                DetectionResultKind::Failure { possibilities }
-            });
-            unord.push(do_manual_map_fut);
+                        DetectionResultKind::Failure { possibilities }
+                    });
+                unord.push(do_manual_map_fut);
+            }
        }

        // NAT Detection
@ -724,86 +742,39 @@ impl DiscoveryContext {
        // Full Cone NAT Detection
        ///////////

-        let c_this = self.clone();
-        let c_external_1 = external_info.first().cloned().unwrap();
        let possibilities = vec![(DialInfoClass::FullConeNAT, 1)];
        all_possibilities.add(&possibilities);
-        let do_full_cone_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
-            let mut retry_count = retry_count;
-
-            // Let's see what kind of NAT we have
-            // Does a redirected dial info validation from a different address and a random port find us?
-            loop {
-                if c_this
-                    .validate_dial_info(
-                        c_external_1.node.clone(),
-                        c_external_1.dial_info.clone(),
-                        true,
-                    )
-                    .await
-                {
-                    // Yes, another machine can use the dial info directly, so Full Cone
-                    // Add public dial info with full cone NAT network class
-
-                    return DetectionResultKind::Result {
-                        possibilities,
-                        result: DetectionResult {
-                            config: c_this.config,
-                            ddi: DetectedDialInfo::Detected(DialInfoDetail {
-                                dial_info: c_external_1.dial_info,
-                                class: DialInfoClass::FullConeNAT,
-                            }),
-                            external_address_types: AddressTypeSet::only(
-                                c_external_1.address.address_type(),
-                            ),
-                        },
-                    };
-                }
-                if retry_count == 0 {
-                    break;
-                }
-                retry_count -= 1;
-            }
-
-            DetectionResultKind::Failure { possibilities }
-        });
-        unord.push(do_full_cone_fut);

        let c_this = self.clone();
        let c_external_1 = external_info.first().cloned().unwrap();
-        let c_external_2 = external_info.get(1).cloned().unwrap();
-        let possibilities = vec![
-            (DialInfoClass::AddressRestrictedNAT, 1),
-            (DialInfoClass::PortRestrictedNAT, 1),
-        ];
-        all_possibilities.add(&possibilities);
-        let do_restricted_cone_fut: PinBoxFutureStatic<DetectionResultKind> =
-            Box::pin(async move {
+
+        // If this is the same as an existing upnp mapping, skip it, since
+        // we are already validating that
+        if !self.matches_mapped_dial_info(&c_external_1.dial_info) {
+            let do_full_cone_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
                let mut retry_count = retry_count;

-                // We are restricted, determine what kind of restriction
-
-                // If we're going to end up as a restricted NAT of some sort
-                // Address is the same, so it's address or port restricted
-
+                // Let's see what kind of NAT we have
+                // Does a redirected dial info validation from a different address and a random port find us?
                loop {
-                    // Do a validate_dial_info on the external address from a random port
                    if c_this
                        .validate_dial_info(
-                            c_external_2.node.clone(),
+                            c_external_1.node.clone(),
                            c_external_1.dial_info.clone(),
-                            false,
+                            true,
                        )
                        .await
                    {
-                        // Got a reply from a non-default port, which means we're only address restricted
+                        // Yes, another machine can use the dial info directly, so Full Cone
+                        // Add public dial info with full cone NAT network class
+
                        return DetectionResultKind::Result {
                            possibilities,
                            result: DetectionResult {
                                config: c_this.config,
                                ddi: DetectedDialInfo::Detected(DialInfoDetail {
-                                    dial_info: c_external_1.dial_info.clone(),
-                                    class: DialInfoClass::AddressRestrictedNAT,
+                                    dial_info: c_external_1.dial_info,
+                                    class: DialInfoClass::FullConeNAT,
                                }),
                                external_address_types: AddressTypeSet::only(
                                    c_external_1.address.address_type(),
@ -811,29 +782,83 @@ impl DiscoveryContext {
                            },
                        };
                    }
-
                    if retry_count == 0 {
                        break;
                    }
                    retry_count -= 1;
                }

-                // Didn't get a reply from a non-default port, which means we are also port restricted
-                DetectionResultKind::Result {
-                    possibilities,
-                    result: DetectionResult {
-                        config: c_this.config,
-                        ddi: DetectedDialInfo::Detected(DialInfoDetail {
-                            dial_info: c_external_1.dial_info.clone(),
-                            class: DialInfoClass::PortRestrictedNAT,
-                        }),
-                        external_address_types: AddressTypeSet::only(
-                            c_external_1.address.address_type(),
-                        ),
-                    },
-                }
+                DetectionResultKind::Failure { possibilities }
            });
-        unord.push(do_restricted_cone_fut);
+            unord.push(do_full_cone_fut);
+
+            let possibilities = vec![
+                (DialInfoClass::AddressRestrictedNAT, 1),
+                (DialInfoClass::PortRestrictedNAT, 1),
+            ];
+            all_possibilities.add(&possibilities);
+
+            let c_this = self.clone();
+            let c_external_1 = external_info.first().cloned().unwrap();
+            let c_external_2 = external_info.get(1).cloned().unwrap();
+            let do_restricted_cone_fut: PinBoxFutureStatic<DetectionResultKind> =
+                Box::pin(async move {
+                    let mut retry_count = retry_count;
+
+                    // We are restricted, determine what kind of restriction
+
+                    // If we're going to end up as a restricted NAT of some sort
+                    // Address is the same, so it's address or port restricted
+
+                    loop {
+                        // Do a validate_dial_info on the external address from a random port
+                        if c_this
+                            .validate_dial_info(
+                                c_external_2.node.clone(),
+                                c_external_1.dial_info.clone(),
+                                false,
+                            )
+                            .await
+                        {
+                            // Got a reply from a non-default port, which means we're only address restricted
+                            return DetectionResultKind::Result {
+                                possibilities,
+                                result: DetectionResult {
+                                    config: c_this.config,
+                                    ddi: DetectedDialInfo::Detected(DialInfoDetail {
+                                        dial_info: c_external_1.dial_info.clone(),
+                                        class: DialInfoClass::AddressRestrictedNAT,
+                                    }),
+                                    external_address_types: AddressTypeSet::only(
+                                        c_external_1.address.address_type(),
+                                    ),
+                                },
+                            };
+                        }
+
+                        if retry_count == 0 {
+                            break;
+                        }
+                        retry_count -= 1;
+                    }
+
+                    // Didn't get a reply from a non-default port, which means we are also port restricted
+                    DetectionResultKind::Result {
+                        possibilities,
+                        result: DetectionResult {
+                            config: c_this.config,
+                            ddi: DetectedDialInfo::Detected(DialInfoDetail {
+                                dial_info: c_external_1.dial_info.clone(),
+                                class: DialInfoClass::PortRestrictedNAT,
+                            }),
+                            external_address_types: AddressTypeSet::only(
+                                c_external_1.address.address_type(),
+                            ),
+                        },
+                    }
+                });
+            unord.push(do_restricted_cone_fut);
+        }
    }

    /// Run a discovery for a particular context
@ -861,34 +886,16 @@ impl DiscoveryContext {

        let enable_upnp = self.config().with(|c| c.network.upnp);
        if enable_upnp {
-            let this = self.clone();
+            // Attempt a port mapping via all available and enabled mechanisms
+            // Try this before the direct mapping in the event that we are restarting
+            // and may not have recorded a mapping created the last time
+            if let Some(external_mapped_dial_info) = self.try_upnp_port_mapping().await {
+                // Got a port mapping, store it
+                self.inner.lock().mapped_dial_info = Some(external_mapped_dial_info);

-            let possibilities = vec![(DialInfoClass::Mapped, 1)];
-            all_possibilities.add(&possibilities);
-
-            let do_mapped_fut: PinBoxFutureStatic<DetectionResultKind> = Box::pin(async move {
-                // Attempt a port mapping via all available and enabled mechanisms
-                // Try this before the direct mapping in the event that we are restarting
-                // and may not have recorded a mapping created the last time
-                if let Some(external_mapped_dial_info) = this.try_upnp_port_mapping().await {
-                    // Got a port mapping, let's use it
-                    return DetectionResultKind::Result {
-                        possibilities,
-                        result: DetectionResult {
-                            config: this.config,
-                            ddi: DetectedDialInfo::Detected(DialInfoDetail {
-                                dial_info: external_mapped_dial_info.clone(),
-                                class: DialInfoClass::Mapped,
-                            }),
-                            external_address_types: AddressTypeSet::only(
-                                external_mapped_dial_info.address_type(),
-                            ),
-                        },
-                    };
-                }
-                DetectionResultKind::Failure { possibilities }
-            });
-            unord.push(do_mapped_fut);
+                // And validate it
+                self.protocol_process_mapped_dial_info(&mut all_possibilities, &mut unord);
+            }
        }

        // NAT Detection
@ -898,7 +905,7 @@ impl DiscoveryContext {
        let local_address_in_external_info = self
            .inner
            .lock()
-            .external_info
+            .external_infos
            .iter()
            .find_map(|ei| self.intf_addrs.contains(&ei.address).then_some(true))
            .unwrap_or_default();
--- a/veilid-core/src/network_manager/native/mod.rs
+++ b/veilid-core/src/network_manager/native/mod.rs
@ -662,11 +662,9 @@ impl Network {
                    };
                } else {
                    // Handle connection-oriented protocols
+                    let connmgr = self.network_manager().connection_manager();
                    let conn = network_result_try!(
-                        self.network_manager()
-                            .connection_manager()
-                            .get_or_create_connection(dial_info.clone())
-                            .await?
+                        connmgr.get_or_create_connection(dial_info.clone()).await?
                    );

                    if let ConnectionHandleSendResult::NotSent(_) = conn.send_async(data).await {
--- a/veilid-core/src/network_manager/native/network_udp.rs
+++ b/veilid-core/src/network_manager/native/network_udp.rs
@ -118,7 +118,8 @@ impl Network {
        let socket_arc = Arc::new(udp_socket);

        // Create protocol handler
-        let protocol_handler = RawUdpProtocolHandler::new(self.registry(), socket_arc);
+        let protocol_handler =
+            RawUdpProtocolHandler::new(self.registry(), socket_arc, addr.is_ipv6());

        // Record protocol handler
        let mut inner = self.inner.lock();
--- a/veilid-core/src/network_manager/native/protocol/udp.rs
+++ b/veilid-core/src/network_manager/native/protocol/udp.rs
@ -7,16 +7,30 @@ pub struct RawUdpProtocolHandler {
    registry: VeilidComponentRegistry,
    socket: Arc<UdpSocket>,
    assembly_buffer: AssemblyBuffer,
+    is_ipv6: bool,
+    default_ttl: u32,
+    current_ttl: Arc<AsyncMutex<u32>>,
 }

 impl_veilid_component_registry_accessor!(RawUdpProtocolHandler);

 impl RawUdpProtocolHandler {
-    pub fn new(registry: VeilidComponentRegistry, socket: Arc<UdpSocket>) -> Self {
+    pub fn new(registry: VeilidComponentRegistry, socket: Arc<UdpSocket>, is_ipv6: bool) -> Self {
+        // Get original TTL
+        let default_ttl = if is_ipv6 {
+            socket2_operation(socket.as_ref(), |s| s.unicast_hops_v6())
+                .expect("getting IPV6_UNICAST_HOPS should not fail")
+        } else {
+            socket2_operation(socket.as_ref(), |s| s.ttl()).expect("getting IP_TTL should not fail")
+        };
+
        Self {
            registry,
            socket,
            assembly_buffer: AssemblyBuffer::new(),
+            is_ipv6,
+            default_ttl,
+            current_ttl: Arc::new(AsyncMutex::new(default_ttl)),
        }
    }

@ -104,24 +118,35 @@ impl RawUdpProtocolHandler {
            return Ok(NetworkResult::no_connection_other("punished"));
        }

-        // Fragment and send
-        let sender = |framed_chunk: Vec<u8>, remote_addr: SocketAddr| async move {
-            let len = network_result_try!(self
-                .socket
-                .send_to(&framed_chunk, remote_addr)
-                .await
-                .into_network_result()?);
-            if len != framed_chunk.len() {
-                bail_io_error_other!("UDP partial send")
+        // Ensure the TTL for sent packets is the default,
+        // then fragment and send the packets
+        {
+            let current_ttl = self.current_ttl.lock().await;
+            if *current_ttl != self.default_ttl {
+                veilid_log!(self error "Incorrect TTL on sent UDP packet ({} != {}): len={}, remote_addr={:?}", *current_ttl, self.default_ttl, data.len(), remote_addr);
            }
-            Ok(NetworkResult::value(()))
-        };

-        network_result_try!(
-            self.assembly_buffer
-                .split_message(data, remote_addr, sender)
-                .await?
-        );
+            // Fragment and send
+            let sender = |framed_chunk: Vec<u8>, remote_addr: SocketAddr| async move {
+                let len = network_result_try!(self
+                    .socket
+                    .send_to(&framed_chunk, remote_addr)
+                    .await
+                    .into_network_result()?);
+                if len != framed_chunk.len() {
+                    bail_io_error_other!("UDP partial send")
+                }
+
+                veilid_log!(self trace "udp::send_message:chunk(len={}) {:?}", len, remote_addr);
+                Ok(NetworkResult::value(()))
+            };
+
+            network_result_try!(
+                self.assembly_buffer
+                    .split_message(data, remote_addr, sender)
+                    .await?
+            );
+        }

        // Return a flow for the sent message
        let peer_addr = PeerAddress::new(
@ -157,22 +182,44 @@ impl RawUdpProtocolHandler {
            return Ok(NetworkResult::no_connection_other("punished"));
        }

-        // Get synchronous socket
-        let res = socket2_operation(self.socket.as_ref(), |s| {
-            // Get original TTL
-            let original_ttl = s.ttl()?;
+        // Ensure the TTL for sent packets is the default,
+        // then fragment and send the packets
+        let res = {
+            let mut current_ttl = self.current_ttl.lock().await;
+            if *current_ttl != self.default_ttl {
+                veilid_log!(self error "Incorrect TTL before sending holepunch UDP packet ({} != {}): remote_addr={:?}", *current_ttl, self.default_ttl, remote_addr);
+            }

-            // Set TTL
-            s.set_ttl(ttl)?;
+            // Get synchronous socket
+            socket2_operation(self.socket.as_ref(), |s| {
+                // Set TTL
+                let ttl_res = if self.is_ipv6 {
+                    s.set_unicast_hops_v6(ttl)
+                } else {
+                    s.set_ttl(ttl)
+                };
+                ttl_res.inspect_err(|e| {
+                    veilid_log!(self error "Failed to set TTL on holepunch UDP socket: {} remote_addr={:?}", e, remote_addr);
+                })?;
+                *current_ttl = ttl;

-            // Send zero length packet
-            let res = s.send_to(&[], &remote_addr.into());
+                // Send zero length packet
+                let res = s.send_to(&[], &remote_addr.into());

-            // Restore TTL immediately
-            s.set_ttl(original_ttl)?;
+                // Restore TTL immediately
+                let ttl_res = if self.is_ipv6 {
+                    s.set_unicast_hops_v6(self.default_ttl)
+                } else {
+                    s.set_ttl(self.default_ttl)
+                };
+                ttl_res.inspect_err(|e| {
+                    veilid_log!(self error "Failed to reset TTL on holepunch UDP socket: {} remote_addr={:?}", e, remote_addr);
+                })?;
+                *current_ttl = self.default_ttl;

-            res
-        });
+                res
+            })
+        };

        // Check for errors
        let len = network_result_try!(res.into_network_result()?);
@ -208,6 +255,10 @@ impl RawUdpProtocolHandler {
        let local_socket_addr = compatible_unspecified_socket_addr(socket_addr);
        let socket = bind_async_udp_socket(local_socket_addr)?
            .ok_or(io::Error::from(io::ErrorKind::AddrInUse))?;
-        Ok(RawUdpProtocolHandler::new(registry, Arc::new(socket)))
+        Ok(RawUdpProtocolHandler::new(
+            registry,
+            Arc::new(socket),
+            local_socket_addr.is_ipv6(),
+        ))
    }
 }
--- a/veilid-core/src/network_manager/native/protocol/ws.rs
+++ b/veilid-core/src/network_manager/native/protocol/ws.rs
@ -6,7 +6,7 @@ use async_tungstenite::tungstenite::handshake::server::{
    Callback, ErrorResponse, Request, Response,
 };
 use async_tungstenite::tungstenite::http::StatusCode;
-use async_tungstenite::tungstenite::protocol::{frame::coding::CloseCode, CloseFrame, Message};
+use async_tungstenite::tungstenite::protocol::Message;
 use async_tungstenite::tungstenite::Error;
 use async_tungstenite::{accept_hdr_async, client_async, WebSocketStream};
 use futures_util::{AsyncRead, AsyncWrite, SinkExt};
@ -98,45 +98,27 @@ where

    #[instrument(level = "trace", target = "protocol", err, skip_all)]
    pub async fn close(&self) -> io::Result<NetworkResult<()>> {
+        let timeout_ms = self
+            .registry
+            .config()
+            .with(|c| c.network.connection_initial_timeout_ms);
+
        // Make an attempt to close the stream normally
        let mut stream = self.stream.clone();
-        let out = match stream
-            .send(Message::Close(Some(CloseFrame {
-                code: CloseCode::Normal,
-                reason: "".into(),
-            })))
-            .await
-        {
-            Ok(v) => NetworkResult::value(v),
-            Err(e) => err_to_network_result(e),
-        };

        // This close does not do a TCP shutdown so it is safe and will not cause TIME_WAIT
-        let _ = stream.close().await;
-
-        Ok(out)
-
-        // Drive connection to close
-        /*
-        let cur_ts = get_timestamp();
-        loop {
-            match stream.flush().await {
-                Ok(()) => {}
-                Err(Error::Io(ioerr)) => {
-                    break Err(ioerr).into_network_result();
-                }
-                Err(Error::ConnectionClosed) => {
-                    break Ok(NetworkResult::value(()));
-                }
-                Err(e) => {
-                    break Err(to_io_error_other(e));
-                }
+        match timeout(timeout_ms, stream.close()).await {
+            Ok(Ok(())) => {}
+            Ok(Err(e)) => {
+                return Ok(err_to_network_result(e));
            }
-            if get_timestamp().saturating_sub(cur_ts) >= MAX_CONNECTION_CLOSE_WAIT_US {
-                return Ok(NetworkResult::Timeout);
+            Err(_) => {
+                // Timed out
+                return Ok(NetworkResult::timeout());
            }
-        }
-        */
+        };
+
+        Ok(NetworkResult::value(()))
    }

    #[instrument(level = "trace", target="protocol", err, skip(self, message), fields(network_result, message.len = message.len()))]
--- a/veilid-core/src/network_manager/network_connection.rs
+++ b/veilid-core/src/network_manager/network_connection.rs
@ -480,20 +480,21 @@ impl NetworkConnection {
                }
            }

-            veilid_log!(registry trace
-                "Connection loop finished flow={:?}",
-                flow
-            );

            // Let the connection manager know the receive loop exited
            connection_manager
-                .report_connection_finished(connection_id)
-                .await;
+                .report_connection_finished(connection_id);

            // Close the low level socket
            if let Err(e) = protocol_connection.close().await {
                veilid_log!(registry debug "Protocol connection close error: {}", e);
            }
+
+            veilid_log!(registry trace
+                "Connection loop exited flow={:?}",
+                flow
+            );
+
        }.in_current_span())
    }

--- a/veilid-core/src/network_manager/relay_worker.rs
+++ b/veilid-core/src/network_manager/relay_worker.rs
@ -0,0 +1,120 @@
+use futures_util::StreamExt as _;
+use stop_token::future::FutureExt as _;
+
+use super::*;
+
+#[derive(Debug)]
+pub(super) enum RelayWorkerRequestKind {
+    Relay {
+        relay_nr: FilteredNodeRef,
+        data: Vec<u8>,
+    },
+}
+
+#[derive(Debug)]
+pub(super) struct RelayWorkerRequest {
+    enqueued_ts: Timestamp,
+    span: Span,
+    kind: RelayWorkerRequestKind,
+}
+
+impl NetworkManager {
+    pub(super) fn startup_relay_workers(&self) -> EyreResult<()> {
+        let mut inner = self.inner.lock();
+
+        // Relay workers
+        let channel = flume::bounded(self.queue_size as usize);
+        inner.relay_send_channel = Some(channel.0.clone());
+        inner.relay_stop_source = Some(StopSource::new());
+
+        // spin up N workers
+        veilid_log!(self debug "Starting {} relay workers", self.concurrency);
+        for task_n in 0..self.concurrency {
+            let registry = self.registry();
+            let receiver = channel.1.clone();
+            let stop_token = inner.relay_stop_source.as_ref().unwrap().token();
+            let jh = spawn(&format!("relay worker {}", task_n), async move {
+                let this = registry.network_manager();
+                Box::pin(this.relay_worker(stop_token, receiver)).await
+            });
+            inner.relay_worker_join_handles.push(jh);
+        }
+        Ok(())
+    }
+
+    pub(super) async fn shutdown_relay_workers(&self) {
+        // Stop the relay workers
+        let mut unord = FuturesUnordered::new();
+        {
+            let mut inner = self.inner.lock();
+            // take the join handles out
+            for h in inner.relay_worker_join_handles.drain(..) {
+                unord.push(h);
+            }
+            // drop the stop
+            drop(inner.relay_stop_source.take());
+        }
+        veilid_log!(self debug "Stopping {} relay workers", unord.len());
+
+        // Wait for them to complete
+        while unord.next().await.is_some() {}
+    }
+
+    pub(super) async fn relay_worker(
+        &self,
+        stop_token: StopToken,
+        receiver: flume::Receiver<RelayWorkerRequest>,
+    ) {
+        while let Ok(Ok(request)) = receiver.recv_async().timeout_at(stop_token.clone()).await {
+            let relay_request_span = tracing::trace_span!("relay request");
+            relay_request_span.follows_from(request.span);
+
+            // Measure dequeue time
+            let dequeue_ts = Timestamp::now();
+            let dequeue_latency = dequeue_ts.saturating_sub(request.enqueued_ts);
+
+            // Process request kind
+            match request.kind {
+                RelayWorkerRequestKind::Relay { relay_nr, data } => {
+                    // Relay the packet to the desired destination
+                    veilid_log!(self trace "relaying {} bytes to {}", data.len(), relay_nr);
+                    if let Err(e) = pin_future!(self.send_data(relay_nr, data.to_vec())).await {
+                        veilid_log!(self debug "failed to relay envelope: {}" ,e);
+                    }
+                }
+            }
+
+            // Measure process time
+            let process_ts = Timestamp::now();
+            let process_latency = process_ts.saturating_sub(dequeue_ts);
+
+            // Accounting
+            self.stats_relay_processed(dequeue_latency, process_latency)
+        }
+    }
+
+    #[instrument(level = "trace", target = "rpc", skip_all)]
+    pub(super) fn enqueue_relay(&self, relay_nr: FilteredNodeRef, data: Vec<u8>) -> EyreResult<()> {
+        let _guard = self
+            .startup_context
+            .startup_lock
+            .enter()
+            .wrap_err("not started up")?;
+
+        let send_channel = {
+            let inner = self.inner.lock();
+            let Some(send_channel) = inner.relay_send_channel.as_ref().cloned() else {
+                bail!("send channel is closed");
+            };
+            send_channel
+        };
+        send_channel
+            .try_send(RelayWorkerRequest {
+                enqueued_ts: Timestamp::now(),
+                span: Span::current(),
+                kind: RelayWorkerRequestKind::Relay { relay_nr, data },
+            })
+            .map_err(|e| eyre!("failed to enqueue relay: {}", e))?;
+        Ok(())
+    }
+}
--- a/veilid-core/src/network_manager/send_data.rs
+++ b/veilid-core/src/network_manager/send_data.rs
@ -152,7 +152,7 @@ impl NetworkManager {
                // If a node is unreachable it may still have an existing inbound connection
                // Try that, but don't cache anything
                network_result_try!(
-                    pin_future_closure!(self.send_data_ncm_existing(target_node_ref, data)).await?
+                    pin_future_closure!(self.send_data_unreachable(target_node_ref, data)).await?
                )
            }
            Some(NodeContactMethod {
@ -239,6 +239,42 @@ impl NetworkManager {
        }))
    }

+    /// Send data to unreachable node
+    #[instrument(level = "trace", target = "net", skip_all, err)]
+    async fn send_data_unreachable(
+        &self,
+        target_node_ref: FilteredNodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<UniqueFlow>> {
+        // First try to send data to the last connection we've seen this peer on
+        let Some(flow) = target_node_ref.last_flow() else {
+            return Ok(NetworkResult::no_connection_other(format!(
+                "node was unreachable: {}",
+                target_node_ref
+            )));
+        };
+
+        let net = self.net();
+        let unique_flow = match pin_future!(debug_duration(
+            || { net.send_data_to_existing_flow(flow, data) },
+            Some(1_000_000)
+        ))
+        .await?
+        {
+            SendDataToExistingFlowResult::Sent(unique_flow) => unique_flow,
+            SendDataToExistingFlowResult::NotSent(_) => {
+                return Ok(NetworkResult::no_connection_other(
+                    "failed to send to existing flow",
+                ));
+            }
+        };
+
+        // Update timestamp for this last connection since we just sent to it
+        self.set_last_flow(target_node_ref.unfiltered(), flow, Timestamp::now());
+
+        Ok(NetworkResult::value(unique_flow))
+    }
+
    /// Send data using NodeContactMethod::Existing
    #[instrument(level = "trace", target = "net", skip_all, err)]
    async fn send_data_ncm_existing(
@ -255,7 +291,12 @@ impl NetworkManager {
        };

        let net = self.net();
-        let unique_flow = match pin_future!(net.send_data_to_existing_flow(flow, data)).await? {
+        let unique_flow = match pin_future!(debug_duration(
+            || { net.send_data_to_existing_flow(flow, data) },
+            Some(1_000_000)
+        ))
+        .await?
+        {
            SendDataToExistingFlowResult::Sent(unique_flow) => unique_flow,
            SendDataToExistingFlowResult::NotSent(_) => {
                return Ok(NetworkResult::no_connection_other(
@ -297,7 +338,12 @@ impl NetworkManager {
        // First try to send data to the last flow we've seen this peer on
        let data = if let Some(flow) = seq_target_node_ref.last_flow() {
            let net = self.net();
-            match pin_future!(net.send_data_to_existing_flow(flow, data)).await? {
+            match pin_future!(debug_duration(
+                || { net.send_data_to_existing_flow(flow, data) },
+                Some(1_000_000)
+            ))
+            .await?
+            {
                SendDataToExistingFlowResult::Sent(unique_flow) => {
                    // Update timestamp for this last connection since we just sent to it
                    self.set_last_flow(target_node_ref.unfiltered(), flow, Timestamp::now());
@ -321,9 +367,18 @@ impl NetworkManager {
            data
        };

+        let excessive_reverse_connect_duration_us = self.config().with(|c| {
+            (c.network.connection_initial_timeout_ms * 2
+                + c.network.reverse_connection_receipt_time_ms) as u64
+                * 1000
+        });
+
        let unique_flow = network_result_try!(
-            pin_future!(self.do_reverse_connect(relay_nr.clone(), target_node_ref.clone(), data))
-                .await?
+            pin_future!(debug_duration(
+                || { self.do_reverse_connect(relay_nr.clone(), target_node_ref.clone(), data) },
+                Some(excessive_reverse_connect_duration_us)
+            ))
+            .await?
        );
        Ok(NetworkResult::value(unique_flow))
    }
@ -339,7 +394,12 @@ impl NetworkManager {
        // First try to send data to the last flow we've seen this peer on
        let data = if let Some(flow) = target_node_ref.last_flow() {
            let net = self.net();
-            match pin_future!(net.send_data_to_existing_flow(flow, data)).await? {
+            match pin_future!(debug_duration(
+                || { net.send_data_to_existing_flow(flow, data) },
+                Some(1_000_000)
+            ))
+            .await?
+            {
                SendDataToExistingFlowResult::Sent(unique_flow) => {
                    // Update timestamp for this last connection since we just sent to it
                    self.set_last_flow(target_node_ref.unfiltered(), flow, Timestamp::now());
@ -363,9 +423,16 @@ impl NetworkManager {
            data
        };

+        let hole_punch_receipt_time_us = self
+            .config()
+            .with(|c| c.network.hole_punch_receipt_time_ms as u64 * 1000);
+
        let unique_flow = network_result_try!(
-            pin_future!(self.do_hole_punch(relay_nr.clone(), target_node_ref.clone(), data))
-                .await?
+            pin_future!(debug_duration(
+                || { self.do_hole_punch(relay_nr.clone(), target_node_ref.clone(), data) },
+                Some(hole_punch_receipt_time_us * 2)
+            ))
+            .await?
        );

        Ok(NetworkResult::value(unique_flow))
@ -391,7 +458,12 @@ impl NetworkManager {
            );

            let net = self.net();
-            match pin_future!(net.send_data_to_existing_flow(flow, data)).await? {
+            match pin_future!(debug_duration(
+                || { net.send_data_to_existing_flow(flow, data) },
+                Some(1_000_000)
+            ))
+            .await?
+            {
                SendDataToExistingFlowResult::Sent(unique_flow) => {
                    // Update timestamp for this last connection since we just sent to it
                    self.set_last_flow(node_ref.unfiltered(), flow, Timestamp::now());
--- a/veilid-core/src/network_manager/stats.rs
+++ b/veilid-core/src/network_manager/stats.rs
@ -22,6 +22,10 @@ impl Default for PerAddressStatsKey {
 pub struct NetworkManagerStats {
    pub self_stats: PerAddressStats,
    pub per_address_stats: LruCache<PerAddressStatsKey, PerAddressStats>,
+    pub relay_worker_dequeue_latency: LatencyStats,
+    pub relay_worker_process_latency: LatencyStats,
+    pub relay_worker_dequeue_latency_accounting: LatencyStatsAccounting,
+    pub relay_worker_process_latency_accounting: LatencyStatsAccounting,
 }

 impl Default for NetworkManagerStats {
@ -29,6 +33,10 @@ impl Default for NetworkManagerStats {
        Self {
            self_stats: PerAddressStats::default(),
            per_address_stats: LruCache::new(IPADDR_TABLE_SIZE),
+            relay_worker_dequeue_latency: LatencyStats::default(),
+            relay_worker_process_latency: LatencyStats::default(),
+            relay_worker_dequeue_latency_accounting: LatencyStatsAccounting::new(),
+            relay_worker_process_latency_accounting: LatencyStatsAccounting::new(),
        }
    }
 }
@ -36,7 +44,7 @@ impl Default for NetworkManagerStats {
 impl NetworkManager {
    // Callbacks from low level network for statistics gathering
    pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) {
-        let inner = &mut *self.inner.lock();
+        let mut inner = self.inner.lock();
        inner
            .stats
            .self_stats
@ -53,7 +61,7 @@ impl NetworkManager {
    }

    pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) {
-        let inner = &mut *self.inner.lock();
+        let mut inner = self.inner.lock();
        inner
            .stats
            .self_stats
@ -69,28 +77,27 @@ impl NetworkManager {
            .add_down(bytes);
    }

+    pub fn stats_relay_processed(
+        &self,
+        dequeue_latency: TimestampDuration,
+        process_latency: TimestampDuration,
+    ) {
+        let mut inner = self.inner.lock();
+        inner.stats.relay_worker_dequeue_latency = inner
+            .stats
+            .relay_worker_dequeue_latency_accounting
+            .record_latency(dequeue_latency);
+        inner.stats.relay_worker_process_latency = inner
+            .stats
+            .relay_worker_process_latency_accounting
+            .record_latency(process_latency);
+    }
+
    pub fn get_stats(&self) -> NetworkManagerStats {
        let inner = self.inner.lock();
        inner.stats.clone()
    }

-    pub fn debug(&self) -> String {
-        let stats = self.get_stats();
-
-        let mut out = String::new();
-        out += "Network Manager\n";
-        out += "---------------\n";
-        let mut out = format!(
-            "Transfer stats:\n{}\n",
-            indent_all_string(&stats.self_stats.transfer_stats)
-        );
-        out += "Node Contact Method Cache\n";
-        out += "-------------------------\n";
-        out += &self.inner.lock().node_contact_method_cache.debug();
-
-        out
-    }
-
    pub fn get_veilid_state(&self) -> Box<VeilidStateNetwork> {
        if !self.network_is_started() {
            return Box::new(VeilidStateNetwork {
--- a/veilid-core/src/network_manager/wasm/protocol/ws.rs
+++ b/veilid-core/src/network_manager/wasm/protocol/ws.rs
@ -52,8 +52,16 @@ impl WebsocketNetworkConnection {
        instrument(level = "trace", err, skip(self))
    )]
    pub async fn close(&self) -> io::Result<NetworkResult<()>> {
+        let timeout_ms = self
+            .registry
+            .config()
+            .with(|c| c.network.connection_initial_timeout_ms);
+
        #[allow(unused_variables)]
-        let x = self.inner.ws_meta.close().await.map_err(ws_err_to_io_error);
+        let x = match timeout(timeout_ms, self.inner.ws_meta.close()).await {
+            Ok(v) => v.map_err(ws_err_to_io_error),
+            Err(_) => return Ok(NetworkResult::timeout()),
+        };
        #[cfg(feature = "verbose-tracing")]
        veilid_log!(self debug "close result: {:?}", x);
        Ok(NetworkResult::value(()))
--- a/veilid-core/src/routing_table/bucket_entry.rs
+++ b/veilid-core/src/routing_table/bucket_entry.rs
@ -23,7 +23,7 @@ const UNRELIABLE_PING_SPAN_SECS: u32 = 60;
 const UNRELIABLE_PING_INTERVAL_SECS: u32 = 5;
 /// - Number of consecutive lost answers on an unordered protocol we will
 ///   tolerate before we call something unreliable
-const UNRELIABLE_LOST_ANSWERS_UNORDERED: u32 = 1;
+const UNRELIABLE_LOST_ANSWERS_UNORDERED: u32 = 2;
 /// - Number of consecutive lost answers on an ordered protocol we will
 ///   tolerate before we call something unreliable
 const UNRELIABLE_LOST_ANSWERS_ORDERED: u32 = 0;
@ -1011,7 +1011,12 @@ impl BucketEntryInner {

                match latest_contact_time {
                    None => {
-                        error!("Peer is reliable, but not seen!");
+                        // Peer may be appear reliable from a previous attach/detach
+                        // But reliability uses last_seen_ts not the last_outbound_contact_time
+                        // Regardless, if we haven't pinged it, we need to ping it.
+                        // But it it was reliable before, and pings successfully then it can
+                        // stay reliable, so we don't make it unreliable just because we haven't
+                        // contacted it yet during this attachment.
                        true
                    }
                    Some(latest_contact_time) => {
@ -1068,11 +1073,14 @@ impl BucketEntryInner {
    }

    pub(super) fn make_not_dead(&mut self, cur_ts: Timestamp) {
-        self.peer_stats.rpc_stats.last_seen_ts = None;
-        self.peer_stats.rpc_stats.failed_to_send = 0;
-        self.peer_stats.rpc_stats.recent_lost_answers_unordered = 0;
-        self.peer_stats.rpc_stats.recent_lost_answers_ordered = 0;
-        assert!(self.check_dead(cur_ts).is_none());
+        if self.check_dead(cur_ts).is_some() {
+            self.peer_stats.rpc_stats.last_seen_ts = None;
+            self.peer_stats.rpc_stats.first_consecutive_seen_ts = None;
+            self.peer_stats.rpc_stats.failed_to_send = 0;
+            self.peer_stats.rpc_stats.recent_lost_answers_unordered = 0;
+            self.peer_stats.rpc_stats.recent_lost_answers_ordered = 0;
+            assert!(self.check_dead(cur_ts).is_none());
+        }
    }

    pub(super) fn _state_debug_info(&self, cur_ts: Timestamp) -> String {
--- a/veilid-core/src/routing_table/mod.rs
+++ b/veilid-core/src/routing_table/mod.rs
@ -43,7 +43,9 @@ pub const RELAY_MANAGEMENT_INTERVAL_SECS: u32 = 1;
 /// How frequently we optimize relays
 pub const RELAY_OPTIMIZATION_INTERVAL_SECS: u32 = 10;
 /// What percentile to keep our relays optimized to
-pub const RELAY_OPTIMIZATION_PERCENTILE: f32 = 75.0;
+pub const RELAY_OPTIMIZATION_PERCENTILE: f32 = 66.0;
+/// What percentile to choose our relays from (must be greater than RELAY_OPTIMIZATION_PERCENTILE)
+pub const RELAY_SELECTION_PERCENTILE: f32 = 85.0;

 /// How frequently we tick the private route management routine
 pub const PRIVATE_ROUTE_MANAGEMENT_INTERVAL_SECS: u32 = 1;
@ -1039,7 +1041,7 @@ impl RoutingTable {
    #[instrument(level = "trace", skip(self), err)]
    pub async fn find_nodes_close_to_node_id(
        &self,
-        node_ref: NodeRef,
+        node_ref: FilteredNodeRef,
        node_id: TypedKey,
        capabilities: Vec<Capability>,
    ) -> EyreResult<NetworkResult<Vec<NodeRef>>> {
@ -1047,11 +1049,7 @@ impl RoutingTable {

        let res = network_result_try!(
            rpc_processor
-                .rpc_call_find_node(
-                    Destination::direct(node_ref.default_filtered()),
-                    node_id,
-                    capabilities
-                )
+                .rpc_call_find_node(Destination::direct(node_ref), node_id, capabilities)
                .await?
        );

@ -1067,7 +1065,7 @@ impl RoutingTable {
    pub async fn find_nodes_close_to_self(
        &self,
        crypto_kind: CryptoKind,
-        node_ref: NodeRef,
+        node_ref: FilteredNodeRef,
        capabilities: Vec<Capability>,
    ) -> EyreResult<NetworkResult<Vec<NodeRef>>> {
        let self_node_id = self.node_id(crypto_kind);
@ -1081,7 +1079,7 @@ impl RoutingTable {
    pub async fn find_nodes_close_to_node_ref(
        &self,
        crypto_kind: CryptoKind,
-        node_ref: NodeRef,
+        node_ref: FilteredNodeRef,
        capabilities: Vec<Capability>,
    ) -> EyreResult<NetworkResult<Vec<NodeRef>>> {
        let Some(target_node_id) = node_ref.node_ids().get(crypto_kind) else {
@ -1102,7 +1100,7 @@ impl RoutingTable {
        capabilities: Vec<Capability>,
    ) {
        // Ask node for nodes closest to our own node
-        let closest_nodes = network_result_value_or_log!(self match pin_future!(self.find_nodes_close_to_self(crypto_kind, node_ref.clone(), capabilities.clone())).await {
+        let closest_nodes = network_result_value_or_log!(self match pin_future!(self.find_nodes_close_to_self(crypto_kind, node_ref.sequencing_filtered(Sequencing::PreferOrdered), capabilities.clone())).await {
            Err(e) => {
                veilid_log!(self error
                    "find_self failed for {:?}: {:?}",
@ -1118,7 +1116,7 @@ impl RoutingTable {
        // Ask each node near us to find us as well
        if wide {
            for closest_nr in closest_nodes {
-                network_result_value_or_log!(self match pin_future!(self.find_nodes_close_to_self(crypto_kind, closest_nr.clone(), capabilities.clone())).await {
+                network_result_value_or_log!(self match pin_future!(self.find_nodes_close_to_self(crypto_kind, closest_nr.sequencing_filtered(Sequencing::PreferOrdered), capabilities.clone())).await {
                    Err(e) => {
                        veilid_log!(self error
                            "find_self failed for {:?}: {:?}",
@ -1146,6 +1144,18 @@ impl RoutingTable {
        inner.find_fastest_node(cur_ts, filter, metric)
    }

+    #[instrument(level = "trace", skip(self, filter, metric), ret)]
+    pub fn find_random_fast_node(
+        &self,
+        cur_ts: Timestamp,
+        filter: impl Fn(&BucketEntryInner) -> bool,
+        percentile: f32,
+        metric: impl Fn(&LatencyStats) -> TimestampDuration,
+    ) -> Option<NodeRef> {
+        let inner = self.inner.read();
+        inner.find_random_fast_node(cur_ts, filter, percentile, metric)
+    }
+
    #[instrument(level = "trace", skip(self, filter, metric), ret)]
    pub fn get_node_speed_percentile(
        &self,
--- a/veilid-core/src/routing_table/node_ref/filtered_node_ref.rs
+++ b/veilid-core/src/routing_table/node_ref/filtered_node_ref.rs
@ -119,6 +119,24 @@ impl NodeRefOperateTrait for FilteredNodeRef {
        let inner = &mut *routing_table.inner.write();
        self.entry.with_mut(inner, f)
    }
+
+    fn with_inner<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&RoutingTableInner) -> T,
+    {
+        let routing_table = self.registry.routing_table();
+        let inner = &*routing_table.inner.read();
+        f(inner)
+    }
+
+    fn with_inner_mut<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&mut RoutingTableInner) -> T,
+    {
+        let routing_table = self.registry.routing_table();
+        let inner = &mut *routing_table.inner.write();
+        f(inner)
+    }
 }

 impl NodeRefCommonTrait for FilteredNodeRef {}
--- a/veilid-core/src/routing_table/node_ref/mod.rs
+++ b/veilid-core/src/routing_table/node_ref/mod.rs
@ -139,6 +139,24 @@ impl NodeRefOperateTrait for NodeRef {
        let inner = &mut *routing_table.inner.write();
        self.entry.with_mut(inner, f)
    }
+
+    fn with_inner<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&RoutingTableInner) -> T,
+    {
+        let routing_table = self.routing_table();
+        let inner = &*routing_table.inner.read();
+        f(inner)
+    }
+
+    fn with_inner_mut<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&mut RoutingTableInner) -> T,
+    {
+        let routing_table = self.routing_table();
+        let inner = &mut *routing_table.inner.write();
+        f(inner)
+    }
 }

 impl NodeRefCommonTrait for NodeRef {}
--- a/veilid-core/src/routing_table/node_ref/node_ref_lock.rs
+++ b/veilid-core/src/routing_table/node_ref/node_ref_lock.rs
@ -90,6 +90,21 @@ impl<'a, N: NodeRefAccessorsTrait + NodeRefOperateTrait + fmt::Debug + fmt::Disp
    {
        panic!("need to locked_mut() for this operation")
    }
+
+    fn with_inner<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&RoutingTableInner) -> T,
+    {
+        let inner = &*self.inner.lock();
+        f(inner)
+    }
+
+    fn with_inner_mut<T, F>(&self, _f: F) -> T
+    where
+        F: FnOnce(&mut RoutingTableInner) -> T,
+    {
+        panic!("need to locked_mut() for this operation")
+    }
 }

 impl<'a, N: NodeRefAccessorsTrait + NodeRefOperateTrait + fmt::Debug + fmt::Display + Clone>
--- a/veilid-core/src/routing_table/node_ref/node_ref_lock_mut.rs
+++ b/veilid-core/src/routing_table/node_ref/node_ref_lock_mut.rs
@ -92,6 +92,22 @@ impl<'a, N: NodeRefAccessorsTrait + NodeRefOperateTrait + fmt::Debug + fmt::Disp
        let inner = &mut *self.inner.lock();
        self.nr.entry().with_mut(inner, f)
    }
+
+    fn with_inner<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&RoutingTableInner) -> T,
+    {
+        let inner = &*self.inner.lock();
+        f(inner)
+    }
+
+    fn with_inner_mut<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&mut RoutingTableInner) -> T,
+    {
+        let inner = &mut *self.inner.lock();
+        f(inner)
+    }
 }

 impl<'a, N: NodeRefAccessorsTrait + NodeRefOperateTrait + fmt::Debug + fmt::Display + Clone>
--- a/veilid-core/src/routing_table/node_ref/traits.rs
+++ b/veilid-core/src/routing_table/node_ref/traits.rs
@ -20,6 +20,13 @@ pub(crate) trait NodeRefOperateTrait {
    fn operate_mut<T, F>(&self, f: F) -> T
    where
        F: FnOnce(&mut RoutingTableInner, &mut BucketEntryInner) -> T;
+    #[expect(dead_code)]
+    fn with_inner<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&RoutingTableInner) -> T;
+    fn with_inner_mut<T, F>(&self, f: F) -> T
+    where
+        F: FnOnce(&mut RoutingTableInner) -> T;
 }

 // Common Operations
@ -115,7 +122,7 @@ pub(crate) trait NodeRefCommonTrait: NodeRefAccessorsTrait + NodeRefOperateTrait
    // }

    fn relay(&self, routing_domain: RoutingDomain) -> EyreResult<Option<FilteredNodeRef>> {
-        self.operate_mut(|rti, e| {
+        let Some(rpi) = self.operate(|rti, e| {
            let Some(sni) = e.signed_node_info(routing_domain) else {
                return Ok(None);
            };
@ -127,8 +134,14 @@ pub(crate) trait NodeRefCommonTrait: NodeRefAccessorsTrait + NodeRefOperateTrait
            if rti.routing_table().matches_own_node_id(rpi.node_ids()) {
                bail!("Can't relay though ourselves");
            }
+            Ok(Some(rpi))
+        })?
+        else {
+            return Ok(None);
+        };

-            // Register relay node and return noderef
+        // Register relay node and return noderef
+        self.with_inner_mut(|rti| {
            let nr = rti.register_node_with_peer_info(rpi, false)?;
            Ok(Some(nr))
        })
--- a/veilid-core/src/routing_table/route_spec_store/mod.rs
+++ b/veilid-core/src/routing_table/route_spec_store/mod.rs
@ -716,7 +716,7 @@ impl RouteSpecStore {
        };

        let Some(rsid) = inner.content.get_id_by_key(&public_key.value) else {
-            veilid_log!(self debug "route id does not exist: {:?}", public_key.value);
+            veilid_log!(self debug target: "network_result", "route id does not exist: {:?}", public_key.value);
            return None;
        };
        let Some(rssd) = inner.content.get_detail(&rsid) else {
@ -753,7 +753,7 @@ impl RouteSpecStore {
                        return None;
                    }
                    Err(e) => {
-                        veilid_log!(self debug "errir verifying signature for hop {} at {} on private route {}: {}", hop_n, hop_public_key, public_key, e);
+                        veilid_log!(self debug "error verifying signature for hop {} at {} on private route {}: {}", hop_n, hop_public_key, public_key, e);
                        return None;
                    }
                }
--- a/veilid-core/src/routing_table/routing_table_inner/mod.rs
+++ b/veilid-core/src/routing_table/routing_table_inner/mod.rs
@ -1445,6 +1445,54 @@ impl RoutingTableInner {
        fastest_node.map(|e| NodeRef::new(self.registry(), e))
    }

+    #[instrument(level = "trace", skip(self, filter, metric), ret)]
+    pub fn find_random_fast_node(
+        &self,
+        cur_ts: Timestamp,
+        filter: impl Fn(&BucketEntryInner) -> bool,
+        percentile: f32,
+        metric: impl Fn(&LatencyStats) -> TimestampDuration,
+    ) -> Option<NodeRef> {
+        // Go through all entries and find all entries that matches filter function
+        let mut all_filtered_nodes: Vec<Arc<BucketEntry>> = Vec::new();
+
+        // Iterate all known nodes for candidates
+        self.with_entries(cur_ts, BucketEntryState::Unreliable, |rti, entry| {
+            let entry2 = entry.clone();
+            entry.with(rti, |_rti, e| {
+                // Filter this node
+                if filter(e) {
+                    all_filtered_nodes.push(entry2);
+                }
+            });
+            // Don't end early, iterate through all entries
+            Option::<()>::None
+        });
+
+        // Sort by fastest tm90 reliable
+        all_filtered_nodes.sort_by(|a, b| {
+            a.with(self, |rti, ea| {
+                b.with(rti, |_rti, eb| {
+                    BucketEntryInner::cmp_fastest_reliable(cur_ts, ea, eb, &metric)
+                })
+            })
+        });
+
+        if all_filtered_nodes.is_empty() {
+            return None;
+        }
+
+        let max_index =
+            (((all_filtered_nodes.len() - 1) as f32) * (100.0 - percentile) / 100.0) as u32;
+        let chosen_index = (get_random_u32() % (max_index + 1)) as usize;
+
+        // Return the chosen node node
+        Some(NodeRef::new(
+            self.registry(),
+            all_filtered_nodes[chosen_index].clone(),
+        ))
+    }
+
    #[instrument(level = "trace", skip(self, filter, metric), ret)]
    pub fn get_node_relative_performance(
        &self,
--- a/veilid-core/src/routing_table/stats_accounting.rs
+++ b/veilid-core/src/routing_table/stats_accounting.rs
@ -1,15 +1,5 @@
 use super::*;

-// Latency entry is per round-trip packet (ping or data)
-// - Size is number of entries
-const ROLLING_LATENCIES_SIZE: usize = 50;
-
-// Transfers entries are in bytes total for the interval
-// - Size is number of entries
-// - Interval is number of seconds in each entry
-const ROLLING_TRANSFERS_SIZE: usize = 10;
-pub const ROLLING_TRANSFERS_INTERVAL_SECS: u32 = 1;
-
 // State entry is per state reason change
 // - Size is number of entries
 const ROLLING_STATE_REASON_SPAN_SIZE: usize = 32;
@ -20,149 +10,6 @@ pub const UPDATE_STATE_STATS_INTERVAL_SECS: u32 = 1;
 // - Interval is number of seconds in each entry
 const ROLLING_ANSWERS_SIZE: usize = 10;
 pub const ROLLING_ANSWER_INTERVAL_SECS: u32 = 60;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
-pub struct TransferCount {
-    down: ByteCount,
-    up: ByteCount,
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct TransferStatsAccounting {
-    rolling_transfers: VecDeque<TransferCount>,
-    current_transfer: TransferCount,
-}
-
-impl TransferStatsAccounting {
-    pub fn new() -> Self {
-        Self {
-            rolling_transfers: VecDeque::new(),
-            current_transfer: TransferCount::default(),
-        }
-    }
-
-    pub fn add_down(&mut self, bytes: ByteCount) {
-        self.current_transfer.down += bytes;
-    }
-
-    pub fn add_up(&mut self, bytes: ByteCount) {
-        self.current_transfer.up += bytes;
-    }
-
-    pub fn roll_transfers(
-        &mut self,
-        last_ts: Timestamp,
-        cur_ts: Timestamp,
-        transfer_stats: &mut TransferStatsDownUp,
-    ) {
-        let dur_ms = cur_ts.saturating_sub(last_ts) / 1000u64;
-        while self.rolling_transfers.len() >= ROLLING_TRANSFERS_SIZE {
-            self.rolling_transfers.pop_front();
-        }
-        self.rolling_transfers.push_back(self.current_transfer);
-
-        transfer_stats.down.total += self.current_transfer.down;
-        transfer_stats.up.total += self.current_transfer.up;
-
-        self.current_transfer = TransferCount::default();
-
-        transfer_stats.down.maximum = 0.into();
-        transfer_stats.up.maximum = 0.into();
-        transfer_stats.down.minimum = u64::MAX.into();
-        transfer_stats.up.minimum = u64::MAX.into();
-        transfer_stats.down.average = 0.into();
-        transfer_stats.up.average = 0.into();
-        for xfer in &self.rolling_transfers {
-            let bpsd = xfer.down * 1000u64 / dur_ms;
-            let bpsu = xfer.up * 1000u64 / dur_ms;
-            transfer_stats.down.maximum.max_assign(bpsd);
-            transfer_stats.up.maximum.max_assign(bpsu);
-            transfer_stats.down.minimum.min_assign(bpsd);
-            transfer_stats.up.minimum.min_assign(bpsu);
-            transfer_stats.down.average += bpsd;
-            transfer_stats.up.average += bpsu;
-        }
-        let len = self.rolling_transfers.len() as u64;
-        if len > 0 {
-            transfer_stats.down.average /= len;
-            transfer_stats.up.average /= len;
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct LatencyStatsAccounting {
-    rolling_latencies: VecDeque<TimestampDuration>,
-}
-
-impl LatencyStatsAccounting {
-    pub fn new() -> Self {
-        Self {
-            rolling_latencies: VecDeque::new(),
-        }
-    }
-
-    fn get_tm_n(sorted_latencies: &[TimestampDuration], n: usize) -> Option<TimestampDuration> {
-        let tmcount = sorted_latencies.len() * n / 100;
-        if tmcount == 0 {
-            None
-        } else {
-            let mut tm = TimestampDuration::new(0);
-            for l in &sorted_latencies[..tmcount] {
-                tm += *l;
-            }
-            tm /= tmcount as u64;
-            Some(tm)
-        }
-    }
-
-    fn get_p_n(sorted_latencies: &[TimestampDuration], n: usize) -> TimestampDuration {
-        let pindex = (sorted_latencies.len() * n / 100).saturating_sub(1);
-        sorted_latencies[pindex]
-    }
-
-    pub fn record_latency(&mut self, latency: TimestampDuration) -> LatencyStats {
-        while self.rolling_latencies.len() >= ROLLING_LATENCIES_SIZE {
-            self.rolling_latencies.pop_front();
-        }
-        self.rolling_latencies.push_back(latency);
-
-        // Calculate latency stats
-
-        let mut fastest = TimestampDuration::new(u64::MAX);
-        let mut slowest = TimestampDuration::new(0u64);
-        let mut average = TimestampDuration::new(0u64);
-
-        for rl in &self.rolling_latencies {
-            fastest.min_assign(*rl);
-            slowest.max_assign(*rl);
-            average += *rl;
-        }
-        let len = self.rolling_latencies.len() as u64;
-        if len > 0 {
-            average /= len;
-        }
-
-        let mut sorted_latencies: Vec<_> = self.rolling_latencies.iter().copied().collect();
-        sorted_latencies.sort();
-
-        let tm90 = Self::get_tm_n(&sorted_latencies, 90).unwrap_or(average);
-        let tm75 = Self::get_tm_n(&sorted_latencies, 75).unwrap_or(average);
-        let p90 = Self::get_p_n(&sorted_latencies, 90);
-        let p75 = Self::get_p_n(&sorted_latencies, 75);
-
-        LatencyStats {
-            fastest,
-            average,
-            slowest,
-            tm90,
-            tm75,
-            p90,
-            p75,
-        }
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct StateReasonSpan {
    state_reason: BucketEntryStateReason,
--- a/veilid-core/src/routing_table/tasks/bootstrap.rs
+++ b/veilid-core/src/routing_table/tasks/bootstrap.rs
@ -289,7 +289,7 @@ impl RoutingTable {

                    // Get what contact method would be used for contacting the bootstrap
                    let bsdi = match network_manager
-                        .get_node_contact_method(nr.default_filtered())
+                        .get_node_contact_method(nr.sequencing_filtered(Sequencing::PreferOrdered))
                    {
                        Ok(Some(ncm)) if ncm.is_direct() => ncm.direct_dial_info().unwrap(),
                        Ok(v) => {
@ -307,7 +307,7 @@ impl RoutingTable {

                    // Need VALID signed peer info, so ask bootstrap to find_node of itself
                    // which will ensure it has the bootstrap's signed peer info as part of the response
-                    let _ = routing_table.find_nodes_close_to_node_ref(crypto_kind, nr.clone(), vec![]).await;
+                    let _ = routing_table.find_nodes_close_to_node_ref(crypto_kind, nr.sequencing_filtered(Sequencing::PreferOrdered), vec![]).await;

                    // Ensure we got the signed peer info
                    if !nr.signed_node_info_has_valid_signature(routing_domain) {
--- a/veilid-core/src/routing_table/tasks/ping_validator.rs
+++ b/veilid-core/src/routing_table/tasks/ping_validator.rs
@ -95,39 +95,17 @@ impl RoutingTable {

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-    // Ping the relay to keep it alive, over every protocol it is relaying for us
-    #[instrument(level = "trace", skip(self, futurequeue), err)]
-    async fn relay_keepalive_public_internet(
+    // Get protocol-specific noderefs for a relay to determine its liveness
+    // Relays get pinged over more protocols than non-relay nodes because we need to ensure
+    // that they can reliably forward packets with 'all' sequencing, not just over 'any' sequencing
+    fn get_relay_specific_noderefs(
        &self,
-        cur_ts: Timestamp,
-        futurequeue: &mut VecDeque<PingValidatorFuture>,
-    ) -> EyreResult<()> {
-        // Get the PublicInternet relay if we are using one
-        let Some(relay_nr) = self.relay_node(RoutingDomain::PublicInternet) else {
-            return Ok(());
-        };
-
+        relay_nr: FilteredNodeRef,
+        routing_domain: RoutingDomain,
+    ) -> Vec<FilteredNodeRef> {
        // Get our publicinternet dial info
-        let dids = self.all_filtered_dial_info_details(
-            RoutingDomain::PublicInternet.into(),
-            &DialInfoFilter::all(),
-        );
-
-        let opt_relay_keepalive_ts = self.relay_node_last_keepalive(RoutingDomain::PublicInternet);
-        let relay_needs_keepalive = opt_relay_keepalive_ts
-            .map(|kts| {
-                cur_ts.saturating_sub(kts).as_u64()
-                    >= (RELAY_KEEPALIVE_PING_INTERVAL_SECS as u64 * 1_000_000u64)
-            })
-            .unwrap_or(true);
-
-        if !relay_needs_keepalive {
-            return Ok(());
-        }
-        // Say we're doing this keepalive now
-        self.inner
-            .write()
-            .set_relay_node_last_keepalive(RoutingDomain::PublicInternet, cur_ts);
+        let dids =
+            self.all_filtered_dial_info_details(routing_domain.into(), &DialInfoFilter::all());

        // We need to keep-alive at one connection per ordering for relays
        // but also one per NAT mapping that we need to keep open for our inbound dial info
@ -180,6 +158,41 @@ impl RoutingTable {
            relay_noderefs.push(relay_nr);
        }

+        relay_noderefs
+    }
+
+    // Ping the relay to keep it alive, over every protocol it is relaying for us
+    #[instrument(level = "trace", skip(self, futurequeue), err)]
+    async fn relay_keepalive_public_internet(
+        &self,
+        cur_ts: Timestamp,
+        futurequeue: &mut VecDeque<PingValidatorFuture>,
+    ) -> EyreResult<()> {
+        // Get the PublicInternet relay if we are using one
+        let Some(relay_nr) = self.relay_node(RoutingDomain::PublicInternet) else {
+            return Ok(());
+        };
+
+        let opt_relay_keepalive_ts = self.relay_node_last_keepalive(RoutingDomain::PublicInternet);
+        let relay_needs_keepalive = opt_relay_keepalive_ts
+            .map(|kts| {
+                cur_ts.saturating_sub(kts).as_u64()
+                    >= (RELAY_KEEPALIVE_PING_INTERVAL_SECS as u64 * 1_000_000u64)
+            })
+            .unwrap_or(true);
+
+        if !relay_needs_keepalive {
+            return Ok(());
+        }
+        // Say we're doing this keepalive now
+        self.inner
+            .write()
+            .set_relay_node_last_keepalive(RoutingDomain::PublicInternet, cur_ts);
+
+        // Get the sequencing-specific relay noderefs for this relay
+        let relay_noderefs =
+            self.get_relay_specific_noderefs(relay_nr, RoutingDomain::PublicInternet);
+
        for relay_nr_filtered in relay_noderefs {
            futurequeue.push_back(
                async move {
@ -249,24 +262,36 @@ impl RoutingTable {
        futurequeue: &mut VecDeque<PingValidatorFuture>,
    ) -> EyreResult<()> {
        // Get all nodes needing pings in the PublicInternet routing domain
+        let relay_node_filter = self.make_public_internet_relay_node_filter();
        let node_refs = self.get_nodes_needing_ping(RoutingDomain::PublicInternet, cur_ts);

        // Just do a single ping with the best protocol for all the other nodes to check for liveness
        for nr in node_refs {
-            let nr = nr.sequencing_clone(Sequencing::PreferOrdered);
+            // If the node is relay-capable, we should ping it over ALL sequencing types
+            // instead of just a simple liveness check on ANY best contact method

-            futurequeue.push_back(
-                async move {
-                    #[cfg(feature = "verbose-tracing")]
-                    veilid_log!(nr debug "--> PublicInternet Validator ping to {:?}", nr);
-                    let rpc_processor = nr.rpc_processor();
-                    let _ = rpc_processor
-                        .rpc_call_status(Destination::direct(nr))
-                        .await?;
-                    Ok(())
-                }
-                .boxed(),
-            );
+            let all_noderefs = if nr.operate(|_rti, e| !relay_node_filter(e)) {
+                // If this is a relay capable node, get all the sequencing specific noderefs
+                self.get_relay_specific_noderefs(nr, RoutingDomain::PublicInternet)
+            } else {
+                // If a non-relay node, ping with the normal ping type
+                vec![nr.sequencing_clone(Sequencing::PreferOrdered)]
+            };
+
+            for nr in all_noderefs {
+                futurequeue.push_back(
+                    async move {
+                        #[cfg(feature = "verbose-tracing")]
+                        veilid_log!(nr debug "--> PublicInternet Validator ping to {:?}", nr);
+                        let rpc_processor = nr.rpc_processor();
+                        let _ = rpc_processor
+                            .rpc_call_status(Destination::direct(nr))
+                            .await?;
+                        Ok(())
+                    }
+                    .boxed(),
+                );
+            }
        }

        Ok(())
--- a/veilid-core/src/routing_table/tasks/relay_management.rs
+++ b/veilid-core/src/routing_table/tasks/relay_management.rs
@ -202,7 +202,12 @@ impl RoutingTable {
            }
            if !got_outbound_relay {
                // Find a node in our routing table that is an acceptable inbound relay
-                if let Some(nr) = self.find_fastest_node(cur_ts, &relay_node_filter, |ls| ls.tm90) {
+                if let Some(nr) = self.find_random_fast_node(
+                    cur_ts,
+                    &relay_node_filter,
+                    RELAY_SELECTION_PERCENTILE,
+                    |ls| ls.tm90,
+                ) {
                    veilid_log!(self debug "Inbound relay node selected: {}", nr);
                    editor.set_relay_node(Some(nr));
                }
--- a/veilid-core/src/rpc_processor/debug.rs
+++ b/veilid-core/src/rpc_processor/debug.rs
@ -0,0 +1,18 @@
+use super::*;
+
+impl RPCProcessor {
+    pub fn debug_info_nodeinfo(&self) -> String {
+        let mut out = String::new();
+        let inner = self.inner.lock();
+        out += &format!(
+            "RPC Worker Dequeue Latency:\n{}",
+            indent_all_string(&inner.rpc_worker_dequeue_latency)
+        );
+        out += "\n";
+        out += &format!(
+            "RPC Worker Process Latency:\n{}",
+            indent_all_string(&inner.rpc_worker_process_latency)
+        );
+        out
+    }
+}
--- a/veilid-core/src/rpc_processor/destination.rs
+++ b/veilid-core/src/rpc_processor/destination.rs
@ -206,7 +206,7 @@ impl Destination {
                }
                if opt_routing_domain.is_none() {
                    // In the case of an unexpected relay, log it and don't pass any sender peer info into an unexpected relay
-                    veilid_log!(node warn "No routing domain for relay: relay={}, node={}", relay, node);
+                    veilid_log!(node debug "Unexpected relay: relay={}, node={}", relay, node);
                };

                (
--- a/veilid-core/src/rpc_processor/mod.rs
+++ b/veilid-core/src/rpc_processor/mod.rs
@ -2,6 +2,7 @@ use super::*;

 mod answer;
 mod coders;
+mod debug;
 mod destination;
 mod error;
 mod fanout;
@ -22,6 +23,7 @@ mod rpc_status;
 mod rpc_validate_dial_info;
 mod rpc_value_changed;
 mod rpc_watch_value;
+mod rpc_worker;
 mod sender_info;
 mod sender_peer_info;

@ -48,7 +50,7 @@ pub(crate) use error::*;
 pub(crate) use fanout::*;
 pub(crate) use sender_info::*;

-use futures_util::StreamExt;
+use futures_util::StreamExt as _;
 use stop_token::future::FutureExt as _;

 use coders::*;
@ -56,6 +58,7 @@ use message::*;
 use message_header::*;
 use operation_waiter::*;
 use rendered_operation::*;
+use rpc_worker::*;
 use sender_peer_info::*;

 use crypto::*;
@ -67,6 +70,10 @@ impl_veilid_log_facility!("rpc");

 /////////////////////////////////////////////////////////////////////

+const RPC_WORKERS_PER_CORE: u32 = 16;
+
+/////////////////////////////////////////////////////////////////////
+
 #[derive(Debug)]
 #[must_use]
 struct WaitableReplyContext {
@ -122,9 +129,13 @@ impl Default for RPCProcessorStartupContext {
 #[derive(Debug)]
 #[must_use]
 struct RPCProcessorInner {
-    send_channel: Option<flume::Sender<(Span, MessageEncoded)>>,
-    stop_source: Option<StopSource>,
-    worker_join_handles: Vec<MustJoinHandle<()>>,
+    rpc_send_channel: Option<flume::Sender<RPCWorkerRequest>>,
+    rpc_stop_source: Option<StopSource>,
+    rpc_worker_join_handles: Vec<MustJoinHandle<()>>,
+    rpc_worker_dequeue_latency: LatencyStats,
+    rpc_worker_process_latency: LatencyStats,
+    rpc_worker_dequeue_latency_accounting: LatencyStatsAccounting,
+    rpc_worker_process_latency_accounting: LatencyStatsAccounting,
 }

 #[derive(Debug)]
@ -146,9 +157,13 @@ impl_veilid_component!(RPCProcessor);
 impl RPCProcessor {
    fn new_inner() -> RPCProcessorInner {
        RPCProcessorInner {
-            send_channel: None,
-            stop_source: None,
-            worker_join_handles: Vec::new(),
+            rpc_send_channel: None,
+            rpc_stop_source: None,
+            rpc_worker_join_handles: Vec::new(),
+            rpc_worker_dequeue_latency: LatencyStats::default(),
+            rpc_worker_process_latency: LatencyStats::default(),
+            rpc_worker_dequeue_latency_accounting: LatencyStatsAccounting::new(),
+            rpc_worker_process_latency_accounting: LatencyStatsAccounting::new(),
        }
    }

@ -173,7 +188,7 @@ impl RPCProcessor {
                }

                // Default RPC concurrency is the number of CPUs * 16 rpc workers per core, as a single worker takes about 1% CPU when relaying and 16% is reasonable for baseline plus relay
-                concurrency *= 16;
+                concurrency *= RPC_WORKERS_PER_CORE;
            }
            (concurrency, queue_size, max_route_hop_count, timeout_us)
        };
@ -227,22 +242,12 @@ impl RPCProcessor {
            let mut inner = self.inner.lock();

            let channel = flume::bounded(self.queue_size as usize);
-            inner.send_channel = Some(channel.0.clone());
-            inner.stop_source = Some(StopSource::new());
-
-            // spin up N workers
-            veilid_log!(self trace "Spinning up {} RPC workers", self.concurrency);
-            for task_n in 0..self.concurrency {
-                let registry = self.registry();
-                let receiver = channel.1.clone();
-                let stop_token = inner.stop_source.as_ref().unwrap().token();
-                let jh = spawn(&format!("rpc worker {}", task_n), async move {
-                    let this = registry.rpc_processor();
-                    Box::pin(this.rpc_worker(stop_token, receiver)).await
-                });
-                inner.worker_join_handles.push(jh);
-            }
+            inner.rpc_send_channel = Some(channel.0.clone());
+            inner.rpc_stop_source = Some(StopSource::new());
        }
+
+        self.startup_rpc_workers()?;
+
        guard.success();

        veilid_log!(self debug "finished rpc processor startup");
@ -260,21 +265,7 @@ impl RPCProcessor {
            .await
            .expect("should be started up");

-        // Stop the rpc workers
-        let mut unord = FuturesUnordered::new();
-        {
-            let mut inner = self.inner.lock();
-            // take the join handles out
-            for h in inner.worker_join_handles.drain(..) {
-                unord.push(h);
-            }
-            // drop the stop
-            drop(inner.stop_source.take());
-        }
-        veilid_log!(self debug "stopping {} rpc worker tasks", unord.len());
-
-        // Wait for them to complete
-        while unord.next().await.is_some() {}
+        self.shutdown_rpc_workers().await;

        veilid_log!(self debug "resetting rpc processor state");

@ -817,8 +808,10 @@ impl RPCProcessor {
            return SenderPeerInfo::default();
        };
        let Some(routing_domain) = opt_routing_domain else {
-            // No routing domain for target, no node info
-            // Only a stale connection or no connection exists
+            // No routing domain for target, no node info is safe to send here
+            // Only a stale connection or no connection exists, or an unexpected
+            // relay was used, possibly due to the destination switching relays
+            // in a race condition with our send
            return SenderPeerInfo::default();
        };

@ -1478,11 +1471,24 @@ impl RPCProcessor {
                let operation = match self.decode_rpc_operation(&encoded_msg) {
                    Ok(v) => v,
                    Err(e) => {
-                        // Debug on error
-                        veilid_log!(self debug "Dropping routed RPC: {}", e);
+                        match e {
+                            // Invalid messages that should be punished
+                            RPCError::Protocol(_) | RPCError::InvalidFormat(_) => {
+                                veilid_log!(self debug "Invalid routed RPC Operation: {}", e);
+
+                                // XXX: Punish routes that send routed undecodable crap
+                                // self.network_manager().address_filter().punish_route_id(xxx, PunishmentReason::FailedToDecodeRoutedMessage);
+                            }
+                            // Ignored messages that should be dropped
+                            RPCError::Ignore(_) | RPCError::Network(_) | RPCError::TryAgain(_) => {
+                                veilid_log!(self trace "Dropping routed RPC Operation: {}", e);
+                            }
+                            // Internal errors that deserve louder logging
+                            RPCError::Unimplemented(_) | RPCError::Internal(_) => {
+                                veilid_log!(self error "Error decoding routed RPC operation: {}", e);
+                            }
+                        };

-                        // XXX: Punish routes that send routed undecodable crap
-                        // self.network_manager().address_filter().punish_route_id(xxx, PunishmentReason::FailedToDecodeRoutedMessage);
                        return Ok(NetworkResult::invalid_message(e));
                    }
                };
@ -1600,16 +1606,16 @@ impl RPCProcessor {
                if let Err(e) = self.waiting_rpc_table.complete_op_waiter(op_id, msg) {
                    match e {
                        RPCError::Unimplemented(_) | RPCError::Internal(_) => {
-                            veilid_log!(self error "Could not complete rpc operation: id = {}: {}", op_id, e);
+                            veilid_log!(self error "Error in RPC operation: id = {}: {}", op_id, e);
                        }
                        RPCError::InvalidFormat(_)
                        | RPCError::Protocol(_)
                        | RPCError::Network(_)
                        | RPCError::TryAgain(_) => {
-                            veilid_log!(self debug "Could not complete rpc operation: id = {}: {}", op_id, e);
+                            veilid_log!(self debug "Could not complete RPC operation: id = {}: {}", op_id, e);
                        }
-                        RPCError::Ignore(_) => {
-                            veilid_log!(self debug "Answer late: id = {}", op_id);
+                        RPCError::Ignore(e) => {
+                            veilid_log!(self debug "RPC operation ignored: id = {}: {}", op_id, e);
                        }
                    };
                    // Don't throw an error here because it's okay if the original operation timed out
@ -1618,164 +1624,4 @@ impl RPCProcessor {
            }
        }
    }
-
-    async fn rpc_worker(
-        &self,
-        stop_token: StopToken,
-        receiver: flume::Receiver<(Span, MessageEncoded)>,
-    ) {
-        while let Ok(Ok((prev_span, msg))) =
-            receiver.recv_async().timeout_at(stop_token.clone()).await
-        {
-            let rpc_message_span = tracing::trace_span!("rpc message");
-            rpc_message_span.follows_from(prev_span);
-
-            network_result_value_or_log!(self match self
-                .process_rpc_message(msg).instrument(rpc_message_span)
-                .await
-            {
-                Err(e) => {
-                    veilid_log!(self error "couldn't process rpc message: {}", e);
-                    continue;
-                }
-
-                Ok(v) => {
-                    v
-                }
-            } => [ format!(": msg.header={:?}", msg.header) ] {});
-        }
-    }
-
-    #[instrument(level = "trace", target = "rpc", skip_all)]
-    pub fn enqueue_direct_message(
-        &self,
-        envelope: Envelope,
-        sender_noderef: FilteredNodeRef,
-        flow: Flow,
-        routing_domain: RoutingDomain,
-        body: Vec<u8>,
-    ) -> EyreResult<()> {
-        let _guard = self
-            .startup_context
-            .startup_lock
-            .enter()
-            .wrap_err("not started up")?;
-
-        if sender_noderef.routing_domain_set() != routing_domain {
-            bail!("routing domain should match peer noderef filter");
-        }
-
-        let header = MessageHeader {
-            detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect {
-                envelope,
-                sender_noderef,
-                flow,
-                routing_domain,
-            }),
-            timestamp: Timestamp::now(),
-            body_len: ByteCount::new(body.len() as u64),
-        };
-
-        let msg = MessageEncoded {
-            header,
-            data: MessageData { contents: body },
-        };
-
-        let send_channel = {
-            let inner = self.inner.lock();
-            let Some(send_channel) = inner.send_channel.as_ref().cloned() else {
-                bail!("send channel is closed");
-            };
-            send_channel
-        };
-        send_channel
-            .try_send((Span::current(), msg))
-            .map_err(|e| eyre!("failed to enqueue direct RPC message: {}", e))?;
-        Ok(())
-    }
-
-    #[instrument(level = "trace", target = "rpc", skip_all)]
-    fn enqueue_safety_routed_message(
-        &self,
-        direct: RPCMessageHeaderDetailDirect,
-        remote_safety_route: PublicKey,
-        sequencing: Sequencing,
-        body: Vec<u8>,
-    ) -> EyreResult<()> {
-        let _guard = self
-            .startup_context
-            .startup_lock
-            .enter()
-            .wrap_err("not started up")?;
-
-        let header = MessageHeader {
-            detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted {
-                direct,
-                remote_safety_route,
-                sequencing,
-            }),
-            timestamp: Timestamp::now(),
-            body_len: (body.len() as u64).into(),
-        };
-
-        let msg = MessageEncoded {
-            header,
-            data: MessageData { contents: body },
-        };
-        let send_channel = {
-            let inner = self.inner.lock();
-            let Some(send_channel) = inner.send_channel.as_ref().cloned() else {
-                bail!("send channel is closed");
-            };
-            send_channel
-        };
-        send_channel
-            .try_send((Span::current(), msg))
-            .map_err(|e| eyre!("failed to enqueue safety routed RPC message: {}", e))?;
-        Ok(())
-    }
-
-    #[instrument(level = "trace", target = "rpc", skip_all)]
-    fn enqueue_private_routed_message(
-        &self,
-        direct: RPCMessageHeaderDetailDirect,
-        remote_safety_route: PublicKey,
-        private_route: PublicKey,
-        safety_spec: SafetySpec,
-        body: Vec<u8>,
-    ) -> EyreResult<()> {
-        let _guard = self
-            .startup_context
-            .startup_lock
-            .enter()
-            .wrap_err("not started up")?;
-
-        let header = MessageHeader {
-            detail: RPCMessageHeaderDetail::PrivateRouted(RPCMessageHeaderDetailPrivateRouted {
-                direct,
-                remote_safety_route,
-                private_route,
-                safety_spec,
-            }),
-            timestamp: Timestamp::now(),
-            body_len: (body.len() as u64).into(),
-        };
-
-        let msg = MessageEncoded {
-            header,
-            data: MessageData { contents: body },
-        };
-
-        let send_channel = {
-            let inner = self.inner.lock();
-            let Some(send_channel) = inner.send_channel.as_ref().cloned() else {
-                bail!("send channel is closed");
-            };
-            send_channel
-        };
-        send_channel
-            .try_send((Span::current(), msg))
-            .map_err(|e| eyre!("failed to enqueue private routed RPC message: {}", e))?;
-        Ok(())
-    }
 }
--- a/veilid-core/src/rpc_processor/operation_waiter.rs
+++ b/veilid-core/src/rpc_processor/operation_waiter.rs
@ -8,7 +8,7 @@ where
 {
    waiter: OperationWaiter<T, C>,
    op_id: OperationId,
-    result_receiver: Option<flume::Receiver<(Span, T)>>,
+    result_receiver: flume::Receiver<(Span, T)>,
 }

 impl<T, C> OperationWaitHandle<T, C>
@ -27,9 +27,7 @@ where
    C: Unpin + Clone,
 {
    fn drop(&mut self) {
-        if self.result_receiver.is_some() {
-            self.waiter.cancel_op_waiter(self.op_id);
-        }
+        self.waiter.cancel_op_waiter(self.op_id);
    }
 }

@ -106,7 +104,7 @@ where
        OperationWaitHandle {
            waiter: self.clone(),
            op_id,
-            result_receiver: Some(result_receiver),
+            result_receiver,
        }
    }

@ -125,65 +123,69 @@ where
    /// Get operation context
    pub fn get_op_context(&self, op_id: OperationId) -> Result<C, RPCError> {
        let inner = self.inner.lock();
-        let Some(waiting_op) = inner.waiting_op_table.get(&op_id) else {
-            return Err(RPCError::ignore(format!(
-                "Missing operation id getting op context: id={}",
-                op_id
-            )));
+        let res = {
+            let Some(waiting_op) = inner.waiting_op_table.get(&op_id) else {
+                return Err(RPCError::ignore(format!(
+                    "Missing operation id getting op context: id={}",
+                    op_id
+                )));
+            };
+            Ok(waiting_op.context.clone())
        };
-        Ok(waiting_op.context.clone())
+        drop(inner);
+        res
    }

    /// Remove wait for op
    #[instrument(level = "trace", target = "rpc", skip_all)]
    fn cancel_op_waiter(&self, op_id: OperationId) {
        let mut inner = self.inner.lock();
-        inner.waiting_op_table.remove(&op_id);
+        {
+            let waiting_op = inner.waiting_op_table.remove(&op_id);
+            drop(waiting_op);
+        }
+        drop(inner);
    }

    /// Complete the waiting op
    #[instrument(level = "trace", target = "rpc", skip_all)]
    pub fn complete_op_waiter(&self, op_id: OperationId, message: T) -> Result<(), RPCError> {
-        let waiting_op = {
-            let mut inner = self.inner.lock();
-            inner
-                .waiting_op_table
-                .remove(&op_id)
-                .ok_or_else(RPCError::else_ignore(format!(
-                    "Unmatched operation id: {}",
-                    op_id
-                )))?
+        let mut inner = self.inner.lock();
+        let res = {
+            let waiting_op =
+                inner
+                    .waiting_op_table
+                    .remove(&op_id)
+                    .ok_or_else(RPCError::else_ignore(format!(
+                        "Unmatched operation id: {}",
+                        op_id
+                    )))?;
+            waiting_op
+                .result_sender
+                .send((Span::current(), message))
+                .map_err(RPCError::ignore)
        };
-        waiting_op
-            .result_sender
-            .send((Span::current(), message))
-            .map_err(RPCError::ignore)
+        drop(inner);
+        res
    }

    /// Wait for operation to complete
    #[instrument(level = "trace", target = "rpc", skip_all)]
    pub async fn wait_for_op(
        &self,
-        mut handle: OperationWaitHandle<T, C>,
+        handle: OperationWaitHandle<T, C>,
        timeout_us: TimestampDuration,
    ) -> Result<TimeoutOr<(T, TimestampDuration)>, RPCError> {
        let timeout_ms = us_to_ms(timeout_us.as_u64()).map_err(RPCError::internal)?;

-        // Take the receiver
-        // After this, we must manually cancel since the cancel on handle drop is disabled
-        let result_receiver = handle.result_receiver.take().unwrap();
-
-        let result_fut = result_receiver.recv_async().in_current_span();
+        let result_fut = handle.result_receiver.recv_async().in_current_span();

        // wait for eventualvalue
        let start_ts = Timestamp::now();
        let res = timeout(timeout_ms, result_fut).await.into_timeout_or();

        match res {
-            TimeoutOr::Timeout => {
-                self.cancel_op_waiter(handle.op_id);
-                Ok(TimeoutOr::Timeout)
-            }
+            TimeoutOr::Timeout => Ok(TimeoutOr::Timeout),
            TimeoutOr::Value(Ok((_span_id, ret))) => {
                let end_ts = Timestamp::now();

@ -192,7 +194,10 @@ where

                Ok(TimeoutOr::Value((ret, end_ts.saturating_sub(start_ts))))
            }
-            TimeoutOr::Value(Err(e)) => Err(RPCError::ignore(e)),
+            TimeoutOr::Value(Err(e)) => {
+                //
+                Err(RPCError::ignore(e))
+            }
        }
    }
 }
--- a/veilid-core/src/rpc_processor/rpc_worker.rs
+++ b/veilid-core/src/rpc_processor/rpc_worker.rs
@ -0,0 +1,247 @@
+use futures_util::StreamExt as _;
+use stop_token::future::FutureExt as _;
+
+use super::*;
+
+#[derive(Debug)]
+pub(super) enum RPCWorkerRequestKind {
+    Message { message_encoded: MessageEncoded },
+}
+
+#[derive(Debug)]
+pub(super) struct RPCWorkerRequest {
+    enqueued_ts: Timestamp,
+    span: Span,
+    kind: RPCWorkerRequestKind,
+}
+
+impl RPCProcessor {
+    pub(super) fn startup_rpc_workers(&self) -> EyreResult<()> {
+        let mut inner = self.inner.lock();
+
+        // Relay workers
+        let channel = flume::bounded(self.queue_size as usize);
+        inner.rpc_send_channel = Some(channel.0.clone());
+        inner.rpc_stop_source = Some(StopSource::new());
+
+        // spin up N workers
+        veilid_log!(self debug "Starting {} RPC workers", self.concurrency);
+        for task_n in 0..self.concurrency {
+            let registry = self.registry();
+            let receiver = channel.1.clone();
+            let stop_token = inner.rpc_stop_source.as_ref().unwrap().token();
+            let jh = spawn(&format!("relay worker {}", task_n), async move {
+                let this = registry.rpc_processor();
+                Box::pin(this.rpc_worker(stop_token, receiver)).await
+            });
+            inner.rpc_worker_join_handles.push(jh);
+        }
+        Ok(())
+    }
+
+    pub(super) async fn shutdown_rpc_workers(&self) {
+        // Stop the rpc workers
+        let mut unord = FuturesUnordered::new();
+        {
+            let mut inner = self.inner.lock();
+            // take the join handles out
+            for h in inner.rpc_worker_join_handles.drain(..) {
+                unord.push(h);
+            }
+            // drop the stop
+            drop(inner.rpc_stop_source.take());
+        }
+        veilid_log!(self debug "Stopping {} RPC workers", unord.len());
+
+        // Wait for them to complete
+        while unord.next().await.is_some() {}
+    }
+
+    async fn rpc_worker(&self, stop_token: StopToken, receiver: flume::Receiver<RPCWorkerRequest>) {
+        while let Ok(Ok(request)) = receiver.recv_async().timeout_at(stop_token.clone()).await {
+            let rpc_request_span = tracing::trace_span!("rpc request");
+            rpc_request_span.follows_from(request.span);
+
+            // Measure dequeue time
+            let dequeue_ts = Timestamp::now();
+            let dequeue_latency = dequeue_ts.saturating_sub(request.enqueued_ts);
+
+            // Process request kind
+            match request.kind {
+                // Process RPC Message
+                RPCWorkerRequestKind::Message { message_encoded } => {
+                    network_result_value_or_log!(self target:"network_result", match self
+                        .process_rpc_message(message_encoded).instrument(rpc_request_span)
+                        .await
+                    {
+                        Err(e) => {
+                            veilid_log!(self error "couldn't process rpc message: {}", e);
+                            continue;
+                        }
+                        Ok(v) => {
+                            v
+                        }
+                    } => [ format!(": msg.header={:?}", message_encoded.header) ] {});
+                }
+            }
+
+            // Measure process time
+            let process_ts = Timestamp::now();
+            let process_latency = process_ts.saturating_sub(dequeue_ts);
+
+            // Accounting
+            let mut inner = self.inner.lock();
+            inner.rpc_worker_dequeue_latency = inner
+                .rpc_worker_dequeue_latency_accounting
+                .record_latency(dequeue_latency);
+            inner.rpc_worker_process_latency = inner
+                .rpc_worker_process_latency_accounting
+                .record_latency(process_latency);
+        }
+    }
+
+    #[instrument(level = "trace", target = "rpc", skip_all)]
+    pub fn enqueue_direct_message(
+        &self,
+        envelope: Envelope,
+        sender_noderef: FilteredNodeRef,
+        flow: Flow,
+        routing_domain: RoutingDomain,
+        body: Vec<u8>,
+    ) -> EyreResult<()> {
+        let _guard = self
+            .startup_context
+            .startup_lock
+            .enter()
+            .wrap_err("not started up")?;
+
+        if sender_noderef.routing_domain_set() != routing_domain {
+            bail!("routing domain should match peer noderef filter");
+        }
+
+        let header = MessageHeader {
+            detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect {
+                envelope,
+                sender_noderef,
+                flow,
+                routing_domain,
+            }),
+            timestamp: Timestamp::now(),
+            body_len: ByteCount::new(body.len() as u64),
+        };
+
+        let message_encoded = MessageEncoded {
+            header,
+            data: MessageData { contents: body },
+        };
+
+        let send_channel = {
+            let inner = self.inner.lock();
+            let Some(send_channel) = inner.rpc_send_channel.as_ref().cloned() else {
+                bail!("send channel is closed");
+            };
+            send_channel
+        };
+        send_channel
+            .try_send(RPCWorkerRequest {
+                enqueued_ts: Timestamp::now(),
+                span: Span::current(),
+                kind: RPCWorkerRequestKind::Message { message_encoded },
+            })
+            .map_err(|e| eyre!("failed to enqueue direct RPC message: {}", e))?;
+        Ok(())
+    }
+
+    #[instrument(level = "trace", target = "rpc", skip_all)]
+    pub(super) fn enqueue_safety_routed_message(
+        &self,
+        direct: RPCMessageHeaderDetailDirect,
+        remote_safety_route: PublicKey,
+        sequencing: Sequencing,
+        body: Vec<u8>,
+    ) -> EyreResult<()> {
+        let _guard = self
+            .startup_context
+            .startup_lock
+            .enter()
+            .wrap_err("not started up")?;
+
+        let header = MessageHeader {
+            detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted {
+                direct,
+                remote_safety_route,
+                sequencing,
+            }),
+            timestamp: Timestamp::now(),
+            body_len: (body.len() as u64).into(),
+        };
+
+        let message_encoded = MessageEncoded {
+            header,
+            data: MessageData { contents: body },
+        };
+        let send_channel = {
+            let inner = self.inner.lock();
+            let Some(send_channel) = inner.rpc_send_channel.as_ref().cloned() else {
+                bail!("send channel is closed");
+            };
+            send_channel
+        };
+        send_channel
+            .try_send(RPCWorkerRequest {
+                enqueued_ts: Timestamp::now(),
+                span: Span::current(),
+                kind: RPCWorkerRequestKind::Message { message_encoded },
+            })
+            .map_err(|e| eyre!("failed to enqueue safety routed RPC message: {}", e))?;
+        Ok(())
+    }
+
+    #[instrument(level = "trace", target = "rpc", skip_all)]
+    pub(super) fn enqueue_private_routed_message(
+        &self,
+        direct: RPCMessageHeaderDetailDirect,
+        remote_safety_route: PublicKey,
+        private_route: PublicKey,
+        safety_spec: SafetySpec,
+        body: Vec<u8>,
+    ) -> EyreResult<()> {
+        let _guard = self
+            .startup_context
+            .startup_lock
+            .enter()
+            .wrap_err("not started up")?;
+
+        let header = MessageHeader {
+            detail: RPCMessageHeaderDetail::PrivateRouted(RPCMessageHeaderDetailPrivateRouted {
+                direct,
+                remote_safety_route,
+                private_route,
+                safety_spec,
+            }),
+            timestamp: Timestamp::now(),
+            body_len: (body.len() as u64).into(),
+        };
+
+        let message_encoded = MessageEncoded {
+            header,
+            data: MessageData { contents: body },
+        };
+
+        let send_channel = {
+            let inner = self.inner.lock();
+            let Some(send_channel) = inner.rpc_send_channel.as_ref().cloned() else {
+                bail!("send channel is closed");
+            };
+            send_channel
+        };
+        send_channel
+            .try_send(RPCWorkerRequest {
+                enqueued_ts: Timestamp::now(),
+                span: Span::current(),
+                kind: RPCWorkerRequestKind::Message { message_encoded },
+            })
+            .map_err(|e| eyre!("failed to enqueue private routed RPC message: {}", e))?;
+        Ok(())
+    }
+}
--- a/veilid-core/src/stats_accounting.rs
+++ b/veilid-core/src/stats_accounting.rs
@ -0,0 +1,153 @@
+use super::*;
+
+// Latency entry is per round-trip packet (ping or data)
+// - Size is number of entries
+const ROLLING_LATENCIES_SIZE: usize = 50;
+
+// Transfers entries are in bytes total for the interval
+// - Size is number of entries
+// - Interval is number of seconds in each entry
+const ROLLING_TRANSFERS_SIZE: usize = 10;
+pub const ROLLING_TRANSFERS_INTERVAL_SECS: u32 = 1;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub struct TransferCount {
+    down: ByteCount,
+    up: ByteCount,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct TransferStatsAccounting {
+    rolling_transfers: VecDeque<TransferCount>,
+    current_transfer: TransferCount,
+}
+
+impl TransferStatsAccounting {
+    pub fn new() -> Self {
+        Self {
+            rolling_transfers: VecDeque::new(),
+            current_transfer: TransferCount::default(),
+        }
+    }
+
+    pub fn add_down(&mut self, bytes: ByteCount) {
+        self.current_transfer.down += bytes;
+    }
+
+    pub fn add_up(&mut self, bytes: ByteCount) {
+        self.current_transfer.up += bytes;
+    }
+
+    pub fn roll_transfers(
+        &mut self,
+        last_ts: Timestamp,
+        cur_ts: Timestamp,
+        transfer_stats: &mut TransferStatsDownUp,
+    ) {
+        let dur_ms = cur_ts.saturating_sub(last_ts) / 1000u64;
+        while self.rolling_transfers.len() >= ROLLING_TRANSFERS_SIZE {
+            self.rolling_transfers.pop_front();
+        }
+        self.rolling_transfers.push_back(self.current_transfer);
+
+        transfer_stats.down.total += self.current_transfer.down;
+        transfer_stats.up.total += self.current_transfer.up;
+
+        self.current_transfer = TransferCount::default();
+
+        transfer_stats.down.maximum = 0.into();
+        transfer_stats.up.maximum = 0.into();
+        transfer_stats.down.minimum = u64::MAX.into();
+        transfer_stats.up.minimum = u64::MAX.into();
+        transfer_stats.down.average = 0.into();
+        transfer_stats.up.average = 0.into();
+        for xfer in &self.rolling_transfers {
+            let bpsd = xfer.down * 1000u64 / dur_ms;
+            let bpsu = xfer.up * 1000u64 / dur_ms;
+            transfer_stats.down.maximum.max_assign(bpsd);
+            transfer_stats.up.maximum.max_assign(bpsu);
+            transfer_stats.down.minimum.min_assign(bpsd);
+            transfer_stats.up.minimum.min_assign(bpsu);
+            transfer_stats.down.average += bpsd;
+            transfer_stats.up.average += bpsu;
+        }
+        let len = self.rolling_transfers.len() as u64;
+        if len > 0 {
+            transfer_stats.down.average /= len;
+            transfer_stats.up.average /= len;
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct LatencyStatsAccounting {
+    rolling_latencies: VecDeque<TimestampDuration>,
+}
+
+impl LatencyStatsAccounting {
+    pub fn new() -> Self {
+        Self {
+            rolling_latencies: VecDeque::new(),
+        }
+    }
+
+    fn get_tm_n(sorted_latencies: &[TimestampDuration], n: usize) -> Option<TimestampDuration> {
+        let tmcount = sorted_latencies.len() * n / 100;
+        if tmcount == 0 {
+            None
+        } else {
+            let mut tm = TimestampDuration::new(0);
+            for l in &sorted_latencies[..tmcount] {
+                tm += *l;
+            }
+            tm /= tmcount as u64;
+            Some(tm)
+        }
+    }
+
+    fn get_p_n(sorted_latencies: &[TimestampDuration], n: usize) -> TimestampDuration {
+        let pindex = (sorted_latencies.len() * n / 100).saturating_sub(1);
+        sorted_latencies[pindex]
+    }
+
+    pub fn record_latency(&mut self, latency: TimestampDuration) -> LatencyStats {
+        while self.rolling_latencies.len() >= ROLLING_LATENCIES_SIZE {
+            self.rolling_latencies.pop_front();
+        }
+        self.rolling_latencies.push_back(latency);
+
+        // Calculate latency stats
+
+        let mut fastest = TimestampDuration::new(u64::MAX);
+        let mut slowest = TimestampDuration::new(0u64);
+        let mut average = TimestampDuration::new(0u64);
+
+        for rl in &self.rolling_latencies {
+            fastest.min_assign(*rl);
+            slowest.max_assign(*rl);
+            average += *rl;
+        }
+        let len = self.rolling_latencies.len() as u64;
+        if len > 0 {
+            average /= len;
+        }
+
+        let mut sorted_latencies: Vec<_> = self.rolling_latencies.iter().copied().collect();
+        sorted_latencies.sort();
+
+        let tm90 = Self::get_tm_n(&sorted_latencies, 90).unwrap_or(average);
+        let tm75 = Self::get_tm_n(&sorted_latencies, 75).unwrap_or(average);
+        let p90 = Self::get_p_n(&sorted_latencies, 90);
+        let p75 = Self::get_p_n(&sorted_latencies, 75);
+
+        LatencyStats {
+            fastest,
+            average,
+            slowest,
+            tm90,
+            tm75,
+            p90,
+            p75,
+        }
+    }
+}
--- a/veilid-core/src/storage_manager/mod.rs
+++ b/veilid-core/src/storage_manager/mod.rs
@ -59,6 +59,8 @@ struct StorageManagerInner {
    pub remote_record_store: Option<RecordStore<RemoteRecordDetail>>,
    /// Record subkeys that have not been pushed to the network because they were written to offline
    pub offline_subkey_writes: HashMap<TypedKey, tasks::offline_subkey_writes::OfflineSubkeyWrite>,
+    /// Record subkeys that are currently being written to in the foreground
+    pub active_subkey_writes: HashMap<TypedKey, ValueSubkeyRangeSet>,
    /// Storage manager metadata that is persistent, including copy of offline subkey writes
    pub metadata_db: Option<TableDB>,
    /// Background processing task (not part of attachment manager tick tree so it happens when detached too)
@ -73,6 +75,7 @@ impl fmt::Debug for StorageManagerInner {
            .field("local_record_store", &self.local_record_store)
            .field("remote_record_store", &self.remote_record_store)
            .field("offline_subkey_writes", &self.offline_subkey_writes)
+            .field("active_subkey_writes", &self.active_subkey_writes)
            //.field("metadata_db", &self.metadata_db)
            //.field("tick_future", &self.tick_future)
            .finish()
@ -736,7 +739,21 @@ impl StorageManager {
        )
        .await?;

-        if !self.dht_is_online() {
+        // Note that we are writing this subkey actively
+        // If it appears we are already doing this, then put it to the offline queue
+        let already_writing = {
+            let asw = inner.active_subkey_writes.entry(key).or_default();
+            if asw.contains(subkey) {
+                veilid_log!(self debug "Already writing to this subkey: {}:{}", key, subkey);
+                true
+            } else {
+                // Add to our list of active subkey writes
+                asw.insert(subkey);
+                false
+            }
+        };
+
+        if already_writing || !self.dht_is_online() {
            veilid_log!(self debug "Writing subkey offline: {}:{} len={}", key, subkey, signed_value_data.value_data().data().len() );
            // Add to offline writes to flush
            Self::add_offline_subkey_write_inner(&mut inner, key, subkey, safety_selection);
@ -764,41 +781,68 @@ impl StorageManager {
                // Failed to write, try again later
                let mut inner = self.inner.lock().await;
                Self::add_offline_subkey_write_inner(&mut inner, key, subkey, safety_selection);
+
+                // Remove from active subkey writes
+                let asw = inner.active_subkey_writes.get_mut(&key).unwrap();
+                if !asw.remove(subkey) {
+                    panic!("missing active subkey write: {}:{}", key, subkey);
+                }
+                if asw.is_empty() {
+                    inner.active_subkey_writes.remove(&key);
+                }
                return Err(e);
            }
        };

-        // Wait for the first result
-        let Ok(result) = res_rx.recv_async().await else {
-            apibail_internal!("failed to receive results");
+        let process = || async {
+            // Wait for the first result
+            let Ok(result) = res_rx.recv_async().await else {
+                apibail_internal!("failed to receive results");
+            };
+            let result = result?;
+            let partial = result.fanout_result.kind.is_partial();
+
+            // Process the returned result
+            let out = self
+                .process_outbound_set_value_result(
+                    key,
+                    subkey,
+                    signed_value_data.value_data().clone(),
+                    safety_selection,
+                    result,
+                )
+                .await?;
+
+            // If there's more to process, do it in the background
+            if partial {
+                self.process_deferred_outbound_set_value_result(
+                    res_rx,
+                    key,
+                    subkey,
+                    out.clone()
+                        .unwrap_or_else(|| signed_value_data.value_data().clone()),
+                    safety_selection,
+                );
+            }
+
+            Ok(out)
        };
-        let result = result?;
-        let partial = result.fanout_result.kind.is_partial();

-        // Process the returned result
-        let out = self
-            .process_outbound_set_value_result(
-                key,
-                subkey,
-                signed_value_data.value_data().clone(),
-                safety_selection,
-                result,
-            )
-            .await?;
+        let out = process().await;

-        // If there's more to process, do it in the background
-        if partial {
-            self.process_deferred_outbound_set_value_result(
-                res_rx,
-                key,
-                subkey,
-                out.clone()
-                    .unwrap_or_else(|| signed_value_data.value_data().clone()),
-                safety_selection,
-            );
+        // Remove active subkey write
+        let mut inner = self.inner.lock().await;
+
+        // Remove from active subkey writes
+        let asw = inner.active_subkey_writes.get_mut(&key).unwrap();
+        if !asw.remove(subkey) {
+            panic!("missing active subkey write: {}:{}", key, subkey);
+        }
+        if asw.is_empty() {
+            inner.active_subkey_writes.remove(&key);
        }

-        Ok(out)
+        out
    }

    /// Create,update or cancel an outbound watch to a DHT value
@ -1019,11 +1063,18 @@ impl StorageManager {
        );

        // Get the offline subkeys for this record still only returning the ones we're inspecting
+        // Merge in the currently offline in-flight records and the actively written records as well
+        let active_subkey_writes = inner
+            .active_subkey_writes
+            .get(&key)
+            .cloned()
+            .unwrap_or_default();
        let offline_subkey_writes = inner
            .offline_subkey_writes
            .get(&key)
            .map(|o| o.subkeys.union(&o.subkeys_in_flight))
            .unwrap_or_default()
+            .union(&active_subkey_writes)
            .intersect(&subkeys);

        // If this is the maximum scope we're interested in, return the report
@ -1120,7 +1171,7 @@ impl StorageManager {
        let dest = rpc_processor
            .resolve_target_to_destination(
                vc.target,
-                SafetySelection::Unsafe(Sequencing::NoPreference),
+                SafetySelection::Unsafe(Sequencing::PreferOrdered),
            )
            .await
            .map_err(VeilidAPIError::from)?;
--- a/veilid-core/src/storage_manager/record_store/inspect_cache.rs
+++ b/veilid-core/src/storage_manager/record_store/inspect_cache.rs
@ -68,9 +68,9 @@ impl InspectCache {
            };
            if idx < entry.1.seqs.len() {
                entry.1.seqs[idx] = seq;
-            } else if idx > entry.1.seqs.len() {
+            } else {
                panic!(
-                    "representational error in l2 inspect cache: {} > {}",
+                    "representational error in l2 inspect cache: {} >= {}",
                    idx,
                    entry.1.seqs.len()
                )
--- a/veilid-core/src/table_store/tests/test_table_store.rs
+++ b/veilid-core/src/table_store/tests/test_table_store.rs
@ -1,5 +1,6 @@
 use crate::tests::test_veilid_config::*;
 use crate::*;
+use futures_util::StreamExt as _;

 async fn startup() -> VeilidAPI {
    trace!("test_table_store: starting");
@ -266,11 +267,55 @@ pub async fn test_protect_unprotect(vcrypto: &AsyncCryptoSystemGuard<'_>, ts: &T
    }
 }

+pub async fn test_store_load_json_many(ts: &TableStore) {
+    trace!("test_json");
+
+    let _ = ts.delete("test").await;
+    let db = ts.open("test", 3).await.expect("should have opened");
+
+    let rows = 16;
+    let valuesize = 32768;
+    let parallel = 10;
+
+    let value = vec!["ABCD".to_string(); valuesize];
+
+    let mut unord = FuturesUnordered::new();
+
+    let mut r = 0;
+    let start_ts = Timestamp::now();
+    loop {
+        while r < rows && unord.len() < parallel {
+            let key = format!("key_{}", r);
+            r += 1;
+
+            unord.push(Box::pin(async {
+                let key = key;
+                db.store_json(0, key.as_bytes(), &value)
+                    .await
+                    .expect("should store");
+                let value2 = db
+                    .load_json::<Vec<String>>(0, key.as_bytes())
+                    .await
+                    .expect("should load")
+                    .expect("should exist");
+                assert_eq!(value, value2);
+            }));
+        }
+        if unord.next().await.is_none() {
+            break;
+        }
+    }
+    let end_ts = Timestamp::now();
+    trace!("test_store_load_json_many duration={}", (end_ts - start_ts));
+}
+
 pub async fn test_all() {
    let api = startup().await;
    let crypto = api.crypto().unwrap();
    let ts = api.table_store().unwrap();

+    test_store_load_json_many(&ts).await;
+
    for ck in VALID_CRYPTO_KINDS {
        let vcrypto = crypto.get_async(ck).unwrap();
        test_protect_unprotect(&vcrypto, &ts).await;
--- a/veilid-core/src/veilid_api/api.rs
+++ b/veilid-core/src/veilid_api/api.rs
@ -1,5 +1,7 @@
 use super::*;

+impl_veilid_log_facility!("veilid_api");
+
 /////////////////////////////////////////////////////////////////////////////////////////////////////

 pub(super) struct VeilidAPIInner {
@ -41,10 +43,9 @@ pub struct VeilidAPI {
 }

 impl VeilidAPI {
-    #[instrument(target = "veilid_api", level = "debug", skip_all)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = context.log_key()), skip_all)]
    pub(crate) fn new(context: VeilidCoreContext) -> Self {
-        event!(target: "veilid_api", Level::DEBUG,
-            "VeilidAPI::new()");
+        veilid_log!(context debug "VeilidAPI::new()");
        Self {
            inner: Arc::new(Mutex::new(VeilidAPIInner {
                context: Some(context),
@ -59,10 +60,9 @@ impl VeilidAPI {
    }

    /// Shut down Veilid and terminate the API.
-    #[instrument(target = "veilid_api", level = "debug", skip_all)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip_all)]
    pub async fn shutdown(self) {
-        event!(target: "veilid_api", Level::DEBUG,
-            "VeilidAPI::shutdown()");
+        veilid_log!(self debug "VeilidAPI::shutdown()");
        let context = { self.inner.lock().context.take() };
        if let Some(context) = context {
            api_shutdown(context).await;
@ -152,6 +152,15 @@ impl VeilidAPI {
        callback(&mut inner.debug_cache)
    }

+    #[must_use]
+    pub(crate) fn log_key(&self) -> &str {
+        let inner = self.inner.lock();
+        let Some(context) = &inner.context else {
+            return "";
+        };
+        context.log_key()
+    }
+
    ////////////////////////////////////////////////////////////////
    // Attach/Detach

@ -174,9 +183,9 @@ impl VeilidAPI {
    }

    /// Connect to the network.
-    #[instrument(target = "veilid_api", level = "debug", skip_all, ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip_all, ret, err)]
    pub async fn attach(&self) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::attach()");

        let attachment_manager = self.core_context()?.attachment_manager();
@ -187,9 +196,9 @@ impl VeilidAPI {
    }

    /// Disconnect from the network.
-    #[instrument(target = "veilid_api", level = "debug", skip_all, ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip_all, ret, err)]
    pub async fn detach(&self) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::detach()");

        let attachment_manager = self.core_context()?.attachment_manager();
@ -203,9 +212,9 @@ impl VeilidAPI {
    // Routing Context

    /// Get a new `RoutingContext` object to use to send messages over the Veilid network with default safety, sequencing, and stability parameters.
-    #[instrument(target = "veilid_api", level = "debug", skip_all, err, ret)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip_all, err, ret)]
    pub fn routing_context(&self) -> VeilidAPIResult<RoutingContext> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::routing_context()");

        RoutingContext::try_new(self.clone())
@ -218,11 +227,11 @@ impl VeilidAPI {
    /// `VLD0:XmnGyJrjMJBRC5ayJZRPXWTBspdX36-pbLb98H3UMeE` but if the prefix is left off
    /// `XmnGyJrjMJBRC5ayJZRPXWTBspdX36-pbLb98H3UMeE` will be parsed with the 'best' cryptosystem
    /// available (at the time of this writing this is `VLD0`).
-    #[instrument(target = "veilid_api", level = "debug", skip(self), fields(s=s.to_string()), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", skip(self), fields(__VEILID_LOG_KEY = self.log_key(), s=s.to_string()), ret, err)]
    pub fn parse_as_target<S: ToString>(&self, s: S) -> VeilidAPIResult<Target> {
        let s = s.to_string();

-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::parse_as_target(s: {:?})", s);

        // Is this a route id?
@ -272,14 +281,14 @@ impl VeilidAPI {
    ///
    /// Returns a route id and 'blob' that can be published over some means (DHT or otherwise) to be
    /// imported by another Veilid node.
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret)]
    pub async fn new_custom_private_route(
        &self,
        crypto_kinds: &[CryptoKind],
        stability: Stability,
        sequencing: Sequencing,
    ) -> VeilidAPIResult<(RouteId, Vec<u8>)> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::new_custom_private_route(crypto_kinds: {:?}, stability: {:?}, sequencing: {:?})",
            crypto_kinds,
            stability,
@ -336,9 +345,9 @@ impl VeilidAPI {
    /// Import a private route blob as a remote private route.
    ///
    /// Returns a route id that can be used to send private messages to the node creating this route.
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub fn import_remote_private_route(&self, blob: Vec<u8>) -> VeilidAPIResult<RouteId> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::import_remote_private_route(blob: {:?})", blob);
        let routing_table = self.core_context()?.routing_table();
        let rss = routing_table.route_spec_store();
@ -349,9 +358,9 @@ impl VeilidAPI {
    ///
    /// This will deactivate the route and free its resources and it can no longer be sent to
    /// or received from.
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub fn release_private_route(&self, route_id: RouteId) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::release_private_route(route_id: {:?})", route_id);
        let routing_table = self.core_context()?.routing_table();
        let rss = routing_table.route_spec_store();
@ -368,13 +377,13 @@ impl VeilidAPI {
    ///
    /// * `call_id` - specifies which call to reply to, and it comes from a [VeilidUpdate::AppCall], specifically the [VeilidAppCall::id()] value.
    /// * `message` - is an answer blob to be returned by the remote node's [RoutingContext::app_call()] function, and may be up to 32768 bytes.
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub async fn app_call_reply(
        &self,
        call_id: OperationId,
        message: Vec<u8>,
    ) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG,
+        veilid_log!(self debug
            "VeilidAPI::app_call_reply(call_id: {:?}, message: {:?})", call_id, message);

        let rpc_processor = self.core_context()?.rpc_processor();
@ -387,7 +396,7 @@ impl VeilidAPI {
    // Tunnel Building

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub async fn start_tunnel(
        &self,
        _endpoint_mode: TunnelMode,
@ -397,7 +406,7 @@ impl VeilidAPI {
    }

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub async fn complete_tunnel(
        &self,
        _endpoint_mode: TunnelMode,
@ -408,7 +417,7 @@ impl VeilidAPI {
    }

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(target = "veilid_api", level = "debug", skip(self), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), skip(self), ret, err)]
    pub async fn cancel_tunnel(&self, _tunnel_id: TunnelId) -> VeilidAPIResult<bool> {
        panic!("unimplemented");
    }
--- a/veilid-core/src/veilid_api/debug.rs
+++ b/veilid-core/src/veilid_api/debug.rs
@ -761,7 +761,9 @@ impl VeilidAPI {
    async fn debug_nodeinfo(&self, _args: String) -> VeilidAPIResult<String> {
        // Dump routing table entry
        let registry = self.core_context()?.registry();
-        let nodeinfo = registry.routing_table().debug_info_nodeinfo();
+        let nodeinfo_rtab = registry.routing_table().debug_info_nodeinfo();
+        let nodeinfo_net = registry.network_manager().debug_info_nodeinfo();
+        let nodeinfo_rpc = registry.rpc_processor().debug_info_nodeinfo();

        // Dump core state
        let state = self.get_state().await?;
@ -790,7 +792,10 @@ impl VeilidAPI {
                "Connection manager unavailable when detached".to_owned()
            };

-        Ok(format!("{}\n{}\n{}\n", nodeinfo, peertable, connman))
+        Ok(format!(
+            "{}\n{}\n{}\n{}\n{}\n",
+            nodeinfo_rtab, nodeinfo_net, nodeinfo_rpc, peertable, connman
+        ))
    }

    fn debug_nodeid(&self, _args: String) -> VeilidAPIResult<String> {
--- a/veilid-core/src/veilid_api/routing_context.rs
+++ b/veilid-core/src/veilid_api/routing_context.rs
@ -1,5 +1,7 @@
 use super::*;

+impl_veilid_log_facility!("veilid_api");
+
 ///////////////////////////////////////////////////////////////////////////////////////

 /// Valid destinations for a message sent over a routing context.
@ -62,6 +64,11 @@ impl RoutingContext {
        })
    }

+    #[must_use]
+    pub(crate) fn log_key(&self) -> &str {
+        self.api.log_key()
+    }
+
    /// Turn on sender privacy, enabling the use of safety routes. This is the default and
    /// calling this function is only necessary if you have previously disable safety or used other parameters.
    ///
@ -72,9 +79,9 @@ impl RoutingContext {
    /// * Sequencing default is to prefer ordered before unordered message delivery.
    ///
    /// To customize the safety selection in use, use [RoutingContext::with_safety()].
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub fn with_default_safety(self) -> VeilidAPIResult<Self> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::with_default_safety(self: {:?})", self);

        let config = self.api.config()?;
@ -89,9 +96,9 @@ impl RoutingContext {
    }

    /// Use a custom [SafetySelection]. Can be used to disable safety via [SafetySelection::Unsafe].
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub fn with_safety(self, safety_selection: SafetySelection) -> VeilidAPIResult<Self> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::with_safety(self: {:?}, safety_selection: {:?})", self, safety_selection);

        Ok(Self {
@ -101,9 +108,9 @@ impl RoutingContext {
    }

    /// Use a specified [Sequencing] preference, with or without privacy.
-    #[instrument(target = "veilid_api", level = "debug", ret)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret)]
    pub fn with_sequencing(self, sequencing: Sequencing) -> Self {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::with_sequencing(self: {:?}, sequencing: {:?})", self, sequencing);

        Self {
@ -140,9 +147,9 @@ impl RoutingContext {
        self.api.clone()
    }

-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    async fn get_destination(&self, target: Target) -> VeilidAPIResult<rpc_processor::Destination> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::get_destination(self: {:?}, target: {:?})", self, target);

        let rpc_processor = self.api.core_context()?.rpc_processor();
@ -165,9 +172,9 @@ impl RoutingContext {
    /// * `message` - an arbitrary message blob of up to 32768 bytes.
    ///
    /// Returns an answer blob of up to 32768 bytes.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn app_call(&self, target: Target, message: Vec<u8>) -> VeilidAPIResult<Vec<u8>> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::app_call(self: {:?}, target: {:?}, message: {:?})", self, target, message);

        let rpc_processor = self.api.core_context()?.rpc_processor();
@ -199,9 +206,9 @@ impl RoutingContext {
    ///
    /// * `target` - can be either a direct node id or a private route.
    /// * `message` - an arbitrary message blob of up to 32768 bytes.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn app_message(&self, target: Target, message: Vec<u8>) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::app_message(self: {:?}, target: {:?}, message: {:?})", self, target, message);

        let rpc_processor = self.api.core_context()?.rpc_processor();
@ -230,14 +237,14 @@ impl RoutingContext {
    /// DHT Records

    /// Deterministicly builds the record key for a given schema and owner public key
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub fn get_dht_record_key(
        &self,
        schema: DHTSchema,
        owner_key: &PublicKey,
        kind: Option<CryptoKind>,
    ) -> VeilidAPIResult<TypedKey> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::get_dht_record_key(self: {:?}, schema: {:?}, owner_key: {:?}, kind: {:?})", self, schema, owner_key, kind);
        schema.validate()?;

@ -256,14 +263,14 @@ impl RoutingContext {
    /// Returns the newly allocated DHT record's key if successful.   
    ///
    /// Note: if you pass in an owner keypair this call is a deterministic! This means that if you try to create a new record for a given owner and schema that already exists it *will* fail.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn create_dht_record(
        &self,
        schema: DHTSchema,
        owner: Option<KeyPair>,
        kind: Option<CryptoKind>,
    ) -> VeilidAPIResult<DHTRecordDescriptor> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::create_dht_record(self: {:?}, schema: {:?}, owner: {:?}, kind: {:?})", self, schema, owner, kind);
        schema.validate()?;

@ -291,13 +298,13 @@ impl RoutingContext {
    /// safety selection.
    ///
    /// Returns the DHT record descriptor for the opened record if successful.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn open_dht_record(
        &self,
        key: TypedKey,
        default_writer: Option<KeyPair>,
    ) -> VeilidAPIResult<DHTRecordDescriptor> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::open_dht_record(self: {:?}, key: {:?}, default_writer: {:?})", self, key, default_writer);

        Crypto::validate_crypto_kind(key.kind)?;
@ -311,9 +318,9 @@ impl RoutingContext {
    /// Closes a DHT record at a specific key that was opened with create_dht_record or open_dht_record.
    ///
    /// Closing a record allows you to re-open it with a different routing context.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn close_dht_record(&self, key: TypedKey) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::close_dht_record(self: {:?}, key: {:?})", self, key);

        Crypto::validate_crypto_kind(key.kind)?;
@ -327,9 +334,9 @@ impl RoutingContext {
    /// If the record is opened, it must be closed before it is deleted.
    /// Deleting a record does not delete it from the network, but will remove the storage of the record
    /// locally, and will prevent its value from being refreshed on the network by this node.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn delete_dht_record(&self, key: TypedKey) -> VeilidAPIResult<()> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::delete_dht_record(self: {:?}, key: {:?})", self, key);

        Crypto::validate_crypto_kind(key.kind)?;
@ -344,14 +351,14 @@ impl RoutingContext {
    ///
    /// Returns `None` if the value subkey has not yet been set.
    /// Returns `Some(data)` if the value subkey has valid data.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn get_dht_value(
        &self,
        key: TypedKey,
        subkey: ValueSubkey,
        force_refresh: bool,
    ) -> VeilidAPIResult<Option<ValueData>> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::get_dht_value(self: {:?}, key: {:?}, subkey: {:?}, force_refresh: {:?})", self, key, subkey, force_refresh);

        Crypto::validate_crypto_kind(key.kind)?;
@ -367,7 +374,7 @@ impl RoutingContext {
    ///
    /// Returns `None` if the value was successfully put.
    /// Returns `Some(data)` if the value put was older than the one available on the network.
-    #[instrument(target = "veilid_api", level = "debug", skip(data), fields(data = print_data(&data, Some(64))), ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", skip(data), fields(__VEILID_LOG_KEY = self.log_key(), data = print_data(&data, Some(64))), ret, err)]
    pub async fn set_dht_value(
        &self,
        key: TypedKey,
@ -375,7 +382,7 @@ impl RoutingContext {
        data: Vec<u8>,
        writer: Option<KeyPair>,
    ) -> VeilidAPIResult<Option<ValueData>> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::set_dht_value(self: {:?}, key: {:?}, subkey: {:?}, data: len={}, writer: {:?})", self, key, subkey, data.len(), writer);

        Crypto::validate_crypto_kind(key.kind)?;
@ -404,7 +411,7 @@ impl RoutingContext {
    /// * If a member (either the owner or a SMPL schema member) has opened the key for writing (even if no writing is performed) then the watch will be signed and guaranteed network.dht.member_watch_limit per writer.
    ///
    /// Members can be specified via the SMPL schema and do not need to allocate writable subkeys in order to offer a member watch capability.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn watch_dht_values(
        &self,
        key: TypedKey,
@ -412,7 +419,7 @@ impl RoutingContext {
        expiration: Timestamp,
        count: u32,
    ) -> VeilidAPIResult<Timestamp> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::watch_dht_values(self: {:?}, key: {:?}, subkeys: {:?}, expiration: {}, count: {})", self, key, subkeys, expiration, count);

        Crypto::validate_crypto_kind(key.kind)?;
@ -430,13 +437,13 @@ impl RoutingContext {
    ///
    /// Returns Ok(true) if there is any remaining watch for this record.
    /// Returns Ok(false) if the entire watch has been cancelled.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn cancel_dht_watch(
        &self,
        key: TypedKey,
        subkeys: ValueSubkeyRangeSet,
    ) -> VeilidAPIResult<bool> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::cancel_dht_watch(self: {:?}, key: {:?}, subkeys: {:?}", self, key, subkeys);

        Crypto::validate_crypto_kind(key.kind)?;
@ -484,14 +491,14 @@ impl RoutingContext {
    ///     Useful for determine which subkeys would change with an SetValue operation.
    ///
    /// Returns a DHTRecordReport with the subkey ranges that were returned that overlapped the schema, and sequence numbers for each of the subkeys in the range.
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn inspect_dht_record(
        &self,
        key: TypedKey,
        subkeys: ValueSubkeyRangeSet,
        scope: DHTReportScope,
    ) -> VeilidAPIResult<DHTRecordReport> {
-        event!(target: "veilid_api", Level::DEBUG, 
+        veilid_log!(self debug
            "RoutingContext::inspect_dht_record(self: {:?}, key: {:?}, subkeys: {:?}, scope: {:?})", self, key, subkeys, scope);

        Crypto::validate_crypto_kind(key.kind)?;
@ -504,13 +511,13 @@ impl RoutingContext {
    /// Block Store

    #[cfg(feature = "unstable-blockstore")]
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn find_block(&self, _block_id: PublicKey) -> VeilidAPIResult<Vec<u8>> {
        panic!("unimplemented");
    }

    #[cfg(feature = "unstable-blockstore")]
-    #[instrument(target = "veilid_api", level = "debug", ret, err)]
+    #[instrument(target = "veilid_api", level = "debug", fields(__VEILID_LOG_KEY = self.log_key()), ret, err)]
    pub async fn supply_block(&self, _block_id: PublicKey) -> VeilidAPIResult<bool> {
        panic!("unimplemented");
    }
--- a/veilid-core/tests/web.rs
+++ b/veilid-core/tests/web.rs
@ -10,10 +10,6 @@ use wasm_bindgen_test::*;

 wasm_bindgen_test_configure!(run_in_browser);

-extern crate wee_alloc;
-#[global_allocator]
-static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
-
 static SETUP_ONCE: Once = Once::new();
 pub fn setup() -> () {
    SETUP_ONCE.call_once(|| {
@ -22,8 +18,8 @@ pub fn setup() -> () {
        let config = veilid_tracing_wasm::WASMLayerConfig::new()
            .with_report_logs_in_timings(false)
            .with_max_level(Level::TRACE)
-            .with_console_config(tracing_wasm::ConsoleConfig::ReportWithoutConsoleColor);
-        tracing_wasm::set_as_global_default_with_config(config);
+            .with_console_config(veilid_tracing_wasm::ConsoleConfig::ReportWithoutConsoleColor);
+        veilid_tracing_wasm::set_as_global_default_with_config(config);
    });
 }

--- a/veilid-flutter/example/pubspec.lock
+++ b/veilid-flutter/example/pubspec.lock
@ -13,10 +13,10 @@ packages:
    dependency: transitive
    description:
      name: async
-      sha256: "947bfcf187f74dbc5e146c9eb9c0f10c9f8b30743e341481c1e2ed3ecc18c20c"
+      sha256: d2872f9c19731c2e5f10444b14686eb7cc85c76274bd6c16e1816bff9a3bab63
      url: "https://pub.dev"
    source: hosted
-    version: "2.11.0"
+    version: "2.12.0"
  async_tools:
    dependency: transitive
    description:
@ -29,10 +29,10 @@ packages:
    dependency: transitive
    description:
      name: boolean_selector
-      sha256: "6cfb5af12253eaf2b368f07bacc5a80d1301a071c73360d746b7f2e32d762c66"
+      sha256: "8aab1771e1243a5063b8b0ff68042d67334e3feab9e95b9490f9a6ebf73b42ea"
      url: "https://pub.dev"
    source: hosted
-    version: "2.1.1"
+    version: "2.1.2"
  change_case:
    dependency: transitive
    description:
@ -45,10 +45,10 @@ packages:
    dependency: transitive
    description:
      name: characters
-      sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
+      sha256: f71061c654a3380576a52b451dd5532377954cf9dbd272a78fc8479606670803
      url: "https://pub.dev"
    source: hosted
-    version: "1.3.0"
+    version: "1.4.0"
  charcode:
    dependency: transitive
    description:
@ -61,18 +61,18 @@ packages:
    dependency: transitive
    description:
      name: clock
-      sha256: cb6d7f03e1de671e34607e909a7213e31d7752be4fb66a86d29fe1eb14bfb5cf
+      sha256: fddb70d9b5277016c77a80201021d40a2247104d9f4aa7bab7157b7e3f05b84b
      url: "https://pub.dev"
    source: hosted
-    version: "1.1.1"
+    version: "1.1.2"
  collection:
    dependency: transitive
    description:
      name: collection
-      sha256: a1ace0a119f20aabc852d165077c036cd864315bd99b7eaa10a60100341941bf
+      sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
      url: "https://pub.dev"
    source: hosted
-    version: "1.19.0"
+    version: "1.19.1"
  convert:
    dependency: transitive
    description:
@ -101,10 +101,10 @@ packages:
    dependency: transitive
    description:
      name: fake_async
-      sha256: "511392330127add0b769b75a987850d136345d9227c6b94c96a04cf4a391bf78"
+      sha256: "6a95e56b2449df2273fd8c45a662d6947ce1ebb7aafe80e550a3f68297f3cacc"
      url: "https://pub.dev"
    source: hosted
-    version: "1.3.1"
+    version: "1.3.2"
  ffi:
    dependency: transitive
    description:
@ -117,10 +117,10 @@ packages:
    dependency: transitive
    description:
      name: file
-      sha256: "5fc22d7c25582e38ad9a8515372cd9a93834027aacf1801cf01164dac0ffa08c"
+      sha256: a3b4f84adafef897088c160faf7dfffb7696046cb13ae90b508c2cbc95d3b8d4
      url: "https://pub.dev"
    source: hosted
-    version: "7.0.0"
+    version: "7.0.1"
  fixnum:
    dependency: transitive
    description:
@ -195,18 +195,18 @@ packages:
    dependency: transitive
    description:
      name: leak_tracker
-      sha256: "7bb2830ebd849694d1ec25bf1f44582d6ac531a57a365a803a6034ff751d2d06"
+      sha256: c35baad643ba394b40aac41080300150a4f08fd0fd6a10378f8f7c6bc161acec
      url: "https://pub.dev"
    source: hosted
-    version: "10.0.7"
+    version: "10.0.8"
  leak_tracker_flutter_testing:
    dependency: transitive
    description:
      name: leak_tracker_flutter_testing
-      sha256: "9491a714cca3667b60b5c420da8217e6de0d1ba7a5ec322fab01758f6998f379"
+      sha256: f8b613e7e6a13ec79cfdc0e97638fddb3ab848452eff057653abd3edba760573
      url: "https://pub.dev"
    source: hosted
-    version: "3.0.8"
+    version: "3.0.9"
  leak_tracker_testing:
    dependency: transitive
    description:
@ -243,10 +243,10 @@ packages:
    dependency: transitive
    description:
      name: matcher
-      sha256: d2323aa2060500f906aa31a895b4030b6da3ebdcc5619d14ce1aada65cd161cb
+      sha256: dc58c723c3c24bf8d3e2d3ad3f2f9d7bd9cf43ec6feaa64181775e60190153f2
      url: "https://pub.dev"
    source: hosted
-    version: "0.12.16+1"
+    version: "0.12.17"
  material_color_utilities:
    dependency: transitive
    description:
@ -259,18 +259,18 @@ packages:
    dependency: transitive
    description:
      name: meta
-      sha256: bdb68674043280c3428e9ec998512fb681678676b3c54e773629ffe74419f8c7
+      sha256: e3641ec5d63ebf0d9b41bd43201a66e3fc79a65db5f61fc181f04cd27aab950c
      url: "https://pub.dev"
    source: hosted
-    version: "1.15.0"
+    version: "1.16.0"
  path:
    dependency: "direct main"
    description:
      name: path
-      sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
+      sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
      url: "https://pub.dev"
    source: hosted
-    version: "1.9.0"
+    version: "1.9.1"
  path_provider:
    dependency: "direct main"
    description:
@ -323,10 +323,10 @@ packages:
    dependency: transitive
    description:
      name: platform
-      sha256: "9b71283fc13df574056616011fb138fd3b793ea47cc509c189a6c3fa5f8a1a65"
+      sha256: "5d6b1b0036a5f331ebc77c850ebc8506cbc1e9416c27e59b439f917a902a4984"
      url: "https://pub.dev"
    source: hosted
-    version: "3.1.5"
+    version: "3.1.6"
  plugin_platform_interface:
    dependency: transitive
    description:
@ -339,10 +339,10 @@ packages:
    dependency: transitive
    description:
      name: process
-      sha256: "21e54fd2faf1b5bdd5102afd25012184a6793927648ea81eea80552ac9405b32"
+      sha256: "107d8be718f120bbba9dcd1e95e3bd325b1b4a4f07db64154635ba03f2567a0d"
      url: "https://pub.dev"
    source: hosted
-    version: "5.0.2"
+    version: "5.0.3"
  quiver:
    dependency: transitive
    description:
@ -360,34 +360,34 @@ packages:
    dependency: transitive
    description:
      name: source_span
-      sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
+      sha256: "254ee5351d6cb365c859e20ee823c3bb479bf4a293c22d17a9f1bf144ce86f7c"
      url: "https://pub.dev"
    source: hosted
-    version: "1.10.0"
+    version: "1.10.1"
  stack_trace:
    dependency: transitive
    description:
      name: stack_trace
-      sha256: "9f47fd3630d76be3ab26f0ee06d213679aa425996925ff3feffdec504931c377"
+      sha256: "8b27215b45d22309b5cddda1aa2b19bdfec9df0e765f2de506401c071d38d1b1"
      url: "https://pub.dev"
    source: hosted
-    version: "1.12.0"
+    version: "1.12.1"
  stream_channel:
    dependency: transitive
    description:
      name: stream_channel
-      sha256: ba2aa5d8cc609d96bbb2899c28934f9e1af5cddbd60a827822ea467161eb54e7
+      sha256: "969e04c80b8bcdf826f8f16579c7b14d780458bd97f56d107d3950fdbeef059d"
      url: "https://pub.dev"
    source: hosted
-    version: "2.1.2"
+    version: "2.1.4"
  string_scanner:
    dependency: transitive
    description:
      name: string_scanner
-      sha256: "688af5ed3402a4bde5b3a6c15fd768dbf2621a614950b17f04626c431ab3c4c3"
+      sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
      url: "https://pub.dev"
    source: hosted
-    version: "1.3.0"
+    version: "1.4.1"
  sync_http:
    dependency: transitive
    description:
@ -416,18 +416,18 @@ packages:
    dependency: transitive
    description:
      name: term_glyph
-      sha256: a29248a84fbb7c79282b40b8c72a1209db169a2e0542bce341da992fe1bc7e84
+      sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
      url: "https://pub.dev"
    source: hosted
-    version: "1.2.1"
+    version: "1.2.2"
  test_api:
    dependency: transitive
    description:
      name: test_api
-      sha256: "664d3a9a64782fcdeb83ce9c6b39e78fd2971d4e37827b9b06c3aa1edc5e760c"
+      sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
      url: "https://pub.dev"
    source: hosted
-    version: "0.7.3"
+    version: "0.7.4"
  typed_data:
    dependency: transitive
    description:
@ -450,7 +450,7 @@ packages:
      path: ".."
      relative: true
    source: path
-    version: "0.4.1"
+    version: "0.4.3"
  veilid_test:
    dependency: "direct dev"
    description:
@ -462,10 +462,10 @@ packages:
    dependency: transitive
    description:
      name: vm_service
-      sha256: f6be3ed8bd01289b34d679c2b62226f63c0e69f9fd2e50a6b3c1c729a961041b
+      sha256: "0968250880a6c5fe7edc067ed0a13d4bae1577fe2771dcf3010d52c4a9d3ca14"
      url: "https://pub.dev"
    source: hosted
-    version: "14.3.0"
+    version: "14.3.1"
  webdriver:
    dependency: transitive
    description:
@ -499,5 +499,5 @@ packages:
    source: hosted
    version: "0.0.6"
 sdks:
-  dart: ">=3.5.0 <4.0.0"
+  dart: ">=3.7.0-0 <4.0.0"
  flutter: ">=3.24.0"
--- a/veilid-flutter/rust/Cargo.toml
+++ b/veilid-flutter/rust/Cargo.toml
@ -35,17 +35,17 @@ debug-load = ["dep:ctor", "dep:libc-print", "dep:android_log-sys", "dep:oslog"]

 [dependencies]
 veilid-core = { path = "../../veilid-core", default-features = false }
-tracing = { version = "0.1.40", features = ["log", "attributes"] }
-tracing-subscriber = "0.3.18"
+tracing = { version = "0.1.41", features = ["log", "attributes"] }
+tracing-subscriber = "0.3.19"
 parking_lot = "0.12.3"
-backtrace = "0.3.71"
-serde_json = "1.0.120"
-serde = "1.0.204"
-futures-util = { version = "0.3.30", default-features = false, features = [
+backtrace = "^0.3.71"
+serde_json = "1.0.140"
+serde = "1.0.218"
+futures-util = { version = "0.3.31", default-features = false, features = [
    "alloc",
 ] }
 cfg-if = "1.0.0"
-data-encoding = { version = "2.6.0" }
+data-encoding = { version = "2.8.0" }
 tracing-flame = "0.2.0"

 # Dependencies for native builds only
@ -55,15 +55,15 @@ tracing-opentelemetry = "0.21"
 opentelemetry = { version = "0.20" }
 opentelemetry-otlp = { version = "0.13" }
 opentelemetry-semantic-conventions = "0.12"
-async-std = { version = "1.12.0", features = ["unstable"], optional = true }
-tokio = { version = "1.38.1", features = ["full"], optional = true }
-tokio-stream = { version = "0.1.15", features = ["net"], optional = true }
-tokio-util = { version = "0.7.11", features = ["compat"], optional = true }
-allo-isolate = "0.1.25"
+async-std = { version = "1.13.0", features = ["unstable"], optional = true }
+tokio = { version = "1.43.0", features = ["full"], optional = true }
+tokio-stream = { version = "0.1.17", features = ["net"], optional = true }
+tokio-util = { version = "0.7.13", features = ["compat"], optional = true }
+allo-isolate = "0.1.26"
 ffi-support = "0.4.4"
 lazy_static = "1.5.0"
 hostname = "0.3.1"
-ctor = { version = "0.2.8", optional = true }
+ctor = { version = "0.2.9", optional = true }
 libc-print = { version = "0.1.23", optional = true }


@ -74,7 +74,7 @@ libc-print = { version = "0.1.23", optional = true }
 [target.'cfg(target_os = "android")'.dependencies]
 jni = "0.21.1"
 paranoid-android = "0.2.2"
-android_log-sys = { version = "0.3.1", optional = true }
+android_log-sys = { version = "0.3.2", optional = true }

 # Dependencies for iOS builds only
 [target.'cfg(target_os = "ios")'.dependencies]
--- a/veilid-python/tests/test_dht.py
+++ b/veilid-python/tests/test_dht.py
@ -1,6 +1,6 @@
 # Routing context veilid tests

-from typing import Awaitable, Callable
+from typing import Any, Awaitable, Callable, Optional
 import pytest
 import asyncio
 import time
@ -374,13 +374,13 @@ async def test_inspect_dht_record(api_connection: veilid.VeilidAPI):

        rr = await rc.inspect_dht_record(rec.key, [], veilid.DHTReportScope.LOCAL)
        print("rr: {}", rr.__dict__)
-        assert rr.subkeys == [[0,1]]
+        assert rr.subkeys == [(0,1)]
        assert rr.local_seqs == [0, 0xFFFFFFFF]
        assert rr.network_seqs == []

        rr2 = await rc.inspect_dht_record(rec.key, [], veilid.DHTReportScope.SYNC_GET)
        print("rr2: {}", rr2.__dict__)
-        assert rr2.subkeys == [[0,1]]
+        assert rr2.subkeys == [(0,1)]
        assert rr2.local_seqs == [0, 0xFFFFFFFF]
        assert rr2.network_seqs == [0, 0xFFFFFFFF]

@ -390,42 +390,28 @@ async def test_inspect_dht_record(api_connection: veilid.VeilidAPI):



-async def _run_test_schema_limit(api_connection: veilid.VeilidAPI, open_record: Callable[[veilid.RoutingContext, int], Awaitable[tuple[veilid.TypedKey, veilid.PublicKey, veilid.SecretKey]]], count: int, test_data: bytes, ):
+async def _run_test_schema_limit(api_connection: veilid.VeilidAPI, open_record: Callable[[veilid.RoutingContext, int], Awaitable[tuple[veilid.DHTRecordDescriptor, Optional[veilid.KeyPair]]]], count: int, test_data: bytes):
    rc = await api_connection.new_routing_context()
    async with rc:
-        (key, owner, secret) = await open_record(rc, count)
-        print(f'{key} {owner}:{secret}')
+        (desc, writer) = await open_record(rc, count)
+        print(f'{desc.key} {writer}')

        # write dht records on server 0
        records = []
        print(f'writing {count} subkeys')
        for n in range(count):
-            await rc.set_dht_value(key, ValueSubkey(n), test_data)
+            await rc.set_dht_value(desc.key, ValueSubkey(n), test_data)
            print(f'  {n}')

-        print('syncing records to the network')
+        await sync(rc, [desc])

-        while True:
-            donerecords = set()
-            subkeysleft = 0
-
-            rr = await rc.inspect_dht_record(key, [])
-            left = 0; [left := left + (x[1]-x[0]+1) for x in rr.offline_subkeys]
-            if left == 0:
-                break
-            print(f'  {left} subkeys left')
-            time.sleep(1)
-
-        await rc.close_dht_record(key)
-
-        await api_connection.debug("record purge local")
-        await api_connection.debug("record purge remote")
+        await rc.close_dht_record(desc.key)

        # read dht records on server 0
        print(f'reading {count} subkeys')
-        desc1 = await rc.open_dht_record(key)
+        desc1 = await rc.open_dht_record(desc.key)
        for n in range(count):
-            vd0 = await rc.get_dht_value(key, ValueSubkey(n), force_refresh=True)
+            vd0 = await rc.get_dht_value(desc1.key, ValueSubkey(n))
            assert vd0.data == test_data
            print(f'  {n}')

@ -433,10 +419,10 @@ async def _run_test_schema_limit(api_connection: veilid.VeilidAPI, open_record:
@pytest.mark.asyncio
 async def test_schema_limit_dflt(api_connection: veilid.VeilidAPI):

-    async def open_record(rc: veilid.RoutingContext, count: int) -> tuple[veilid.TypedKey, veilid.PublicKey, veilid.SecretKey]:
+    async def open_record(rc: veilid.RoutingContext, count: int) -> tuple[veilid.DHTRecordDescriptor, Optional[veilid.KeyPair]]:
        schema = veilid.DHTSchema.dflt(count)
        desc = await rc.create_dht_record(schema)
-        return (desc.key, desc.owner, desc.owner_secret)
+        return (desc, desc.owner_key_pair())


    print("Test with maximum number of subkeys before lower limit hit")
@ -474,7 +460,7 @@ async def test_schema_limit_smpl(api_connection: veilid.VeilidAPI):
        desc = await rc.create_dht_record(schema)
        await rc.open_dht_record(desc.key, writer_keypair)

-        return (desc.key, writer_keypair.key(), writer_keypair.secret())
+        return (desc, writer_keypair)

    print("Test with maximum number of subkeys before lower limit hit")
    TEST_DATA = b"A" * 32768
@ -545,18 +531,7 @@ async def test_dht_integration_writer_reader():

                await rc0.set_dht_value(desc.key, ValueSubkey(0), TEST_DATA)

-            print('syncing records to the network')
-            recleft = len(records)
-            for desc in records:
-                while True:
-                    rr = await rc0.inspect_dht_record(desc.key, [])
-                    left = 0; [left := left + (x[1]-x[0]+1) for x in rr.offline_subkeys]
-                    if left == 0:
-                        await rc0.close_dht_record(desc.key)
-                        break
-                    print(f'  {recleft} records {left} subkeys left')
-                    time.sleep(0.1)
-                recleft-=1
+            await sync(rc0, records)

            # read dht records on server 1
            print(f'reading {COUNT} records')
@ -636,6 +611,96 @@ async def test_dht_write_read_local():
                print(f'  {n}')
                n += 1

+
+@pytest.mark.skipif(os.getenv("STRESS") != "1", reason="stress test takes a long time")
+@pytest.mark.asyncio
+async def test_dht_write_read_full_subkeys_local():
+
+    async def null_update_callback(update: veilid.VeilidUpdate):
+        pass
+
+    try:
+        api0 = await veilid.api_connector(null_update_callback, 0)
+    except veilid.VeilidConnectionError:
+        pytest.skip("Unable to connect to veilid-server 0.")
+
+    async with api0:
+        # purge local and remote record stores to ensure we start fresh
+        await api0.debug("record purge local")
+        await api0.debug("record purge remote")
+
+        # make routing contexts
+        rc0 = await api0.new_routing_context()
+        async with rc0:
+
+            # Number of records
+            COUNT = 8
+            # Number of subkeys per record
+            SUBKEY_COUNT = 32
+            # Nonce to encrypt test data
+            NONCE = veilid.Nonce.from_bytes(b"A"*24)
+            # Secret to encrypt test data
+            SECRET = veilid.SharedSecret.from_bytes(b"A"*32)
+            # Max subkey size
+            MAX_SUBKEY_SIZE = min(32768, 1024*1024/SUBKEY_COUNT)
+            # MAX_SUBKEY_SIZE = 256
+
+            # write dht records on server 0
+            records = []
+            subkey_data_list = []
+            schema = veilid.DHTSchema.dflt(SUBKEY_COUNT)
+            print(f'writing {COUNT} records with full subkeys')
+            init_futures = set()
+            for n in range(COUNT):
+
+                # Make encrypted data that is consistent and hard to compress
+                subkey_data = bytes(chr(ord("A")+n)*MAX_SUBKEY_SIZE, 'ascii')
+                print(f"subkey_data({n}):len={len(subkey_data)}")
+
+                cs = await api0.best_crypto_system()
+                async with cs:
+                    subkey_data = await cs.crypt_no_auth(subkey_data, NONCE, SECRET)
+                subkey_data_list.append(subkey_data)
+
+
+                desc = await rc0.create_dht_record(schema)
+                records.append(desc)
+
+                for i in range(SUBKEY_COUNT):
+                    init_futures.add(rc0.set_dht_value(desc.key, ValueSubkey(i), subkey_data))
+
+                print(f'  {n}: {desc.key} {desc.owner}:{desc.owner_secret}')
+
+            # Wait for all records to synchronize, with progress bars
+            await sync_win(rc0, records, SUBKEY_COUNT, init_futures)
+
+            for desc0 in records:
+                await rc0.close_dht_record(desc0.key)
+
+            await api0.debug("record purge local")
+            await api0.debug("record purge remote")
+
+            # read dht records on server 0
+            print(f'reading {COUNT} records')
+            for n, desc0 in enumerate(records):
+                desc1 = await rc0.open_dht_record(desc0.key)
+
+                for i in range(SUBKEY_COUNT):
+                    vd0 = None
+                    while vd0 == None:
+                        vd0 = await rc0.get_dht_value(desc1.key, ValueSubkey(i), force_refresh=True)
+                        if vd0 != None:
+                            assert vd0.data == subkey_data_list[n]
+                            break
+                        time.sleep(1)
+                        print(f"retrying record {n} subkey {i}")
+
+
+                await rc0.close_dht_record(desc1.key)
+
+                print(f'  {n}')
+
+
 async def sync(rc: veilid.RoutingContext, records: list[veilid.DHTRecordDescriptor]):
    print('syncing records to the network')
    syncrecords = records.copy()
@ -646,9 +711,121 @@ async def sync(rc: veilid.RoutingContext, records: list[veilid.DHTRecordDescript
            rr = await rc.inspect_dht_record(desc.key, [])
            left = 0; [left := left + (x[1]-x[0]+1) for x in rr.offline_subkeys]
            if left == 0:
-                donerecords.add(desc)
+                if veilid.ValueSeqNum.NONE not in rr.local_seqs:
+                    donerecords.add(desc)
            else:
                subkeysleft += left
        syncrecords = [x for x in syncrecords if x not in donerecords]
        print(f'  {len(syncrecords)} records {subkeysleft} subkeys left')
        time.sleep(1)
+
+
+async def sync_win(
+        rc: veilid.RoutingContext,
+        records: list[veilid.DHTRecordDescriptor],
+        subkey_count: int,
+        init_futures: set[Awaitable[Any]]
+    ):
+    import curses
+
+    screen = curses.initscr()
+
+    curses.start_color()
+    curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_BLUE)
+    curses.init_pair(2, curses.COLOR_BLACK, curses.COLOR_CYAN)
+    curses.init_pair(3, curses.COLOR_BLACK, curses.COLOR_YELLOW)
+    curses.init_pair(4, curses.COLOR_BLACK, curses.COLOR_GREEN)
+
+    HEIGHT=len(records) + 3
+    GRAPHWIDTH = subkey_count
+    WIDTH=GRAPHWIDTH + 4 + 1 + 43 + 2
+
+    cur_lines = curses.LINES
+    cur_cols = curses.COLS
+    win = curses.newwin(HEIGHT, WIDTH,
+        max(0, int(cur_lines/2) - int(HEIGHT/2)),
+        max(0, int(cur_cols/2) - int(WIDTH/2)))
+    win.clear()
+    win.border(0,0,0,0)
+    win.nodelay(True)
+
+    # Record inspection and completion state
+
+    # Records we are done inspecting and have finished sync
+    donerecords: set[veilid.TypedKey] = set()
+    # Records we are currently inspecting that are in the futures set
+    futurerecords: set[veilid.TypedKey] = set()
+    # All the futures we are waiting for
+    futures = set()
+    # The record report state
+    recordreports: dict[veilid.TypedKey, veilid.DHTRecordReport] = dict()
+
+    # Add initial futures with None key
+    for fut in init_futures:
+        async def _do_init_fut(fut):
+            return (None, await fut)
+        futures.add(asyncio.create_task(_do_init_fut(fut)))
+
+    # Loop until all records are completed
+    while len(donerecords) != len(records):
+
+        # Update the futures with inspects for unfinished records
+        for n, desc in enumerate(records):
+            if desc.key in donerecords or desc.key in futurerecords:
+                continue
+            async def _do_inspect(key: veilid.TypedKey):
+                return (key, await rc.inspect_dht_record(key, []))
+            futures.add(asyncio.create_task(_do_inspect(desc.key)))
+            futurerecords.add(desc.key)
+
+        # Wait for some futures to finish
+        done, futures = await asyncio.wait(futures, return_when = asyncio.FIRST_COMPLETED)
+
+        # Process finished futures into the state
+        for rr_fut in done:
+            key: veilid.TypedKey
+            rr: veilid.DHTRecordReport
+            key, rr = await rr_fut
+            if key is not None:
+                futurerecords.remove(key)
+
+                if len(rr.subkeys) == 1 and rr.subkeys[0] == (0, subkey_count-1) and veilid.ValueSeqNum.NONE not in rr.local_seqs and len(rr.offline_subkeys) == 0:
+                    if key in recordreports:
+                        del recordreports[key]
+                    donerecords.add(key)
+                else:
+                    recordreports[key] = rr
+
+        # Re-render the state
+        if cur_lines != curses.LINES or cur_cols != curses.COLS:
+            cur_lines = curses.LINES
+            cur_cols = curses.COLS
+            win.move(
+                max(0, int(cur_lines/2) - int(HEIGHT/2)),
+                max(0, int(cur_cols/2) - int(WIDTH/2)))
+            win.border(0,0,0,0)
+        win.addstr(1, 1, "syncing records to the network", curses.color_pair(0))
+        for n, rr in enumerate(records):
+            key = rr.key
+            win.addstr(n+2, GRAPHWIDTH+1, key, curses.color_pair(0))
+
+            if key in donerecords:
+                win.addstr(n+2, 1, " " * subkey_count, curses.color_pair(4))
+            elif key in recordreports:
+                rr = recordreports[key]
+                win.addstr(n+2, 1, " " * subkey_count, curses.color_pair(1))
+                for (a,b) in rr.subkeys:
+                    for m in range(a, b+1):
+                        if rr.local_seqs[m] != veilid.ValueSeqNum.NONE:
+                            win.addstr(n+2, m+1, " ", curses.color_pair(2))
+                for (a,b) in rr.offline_subkeys:
+                    win.addstr(n+2, a+1, " " * (b-a+1), curses.color_pair(3))
+            else:
+                win.addstr(n+2, 1, " " * subkey_count, curses.color_pair(1))
+
+        win.refresh()
+        time.sleep(.5)
+
+
+    curses.endwin()
+
--- a/veilid-python/veilid/api.py
+++ b/veilid-python/veilid/api.py
@ -6,11 +6,20 @@ from .state import VeilidState


 class RoutingContext(ABC):
+    ref_count: int
+
+    def __init__(
+        self,
+    ):
+        self.ref_count = 0
+
    async def __aenter__(self) -> Self:
+        self.ref_count += 1
        return self

    async def __aexit__(self, *excinfo):
-        if not self.is_done():
+        self.ref_count -= 1
+        if self.ref_count == 0 and not self.is_done():
            await self.release()

    @abstractmethod
@ -109,13 +118,22 @@ class RoutingContext(ABC):


 class TableDbTransaction(ABC):
+    ref_count: int
+
+    def __init__(
+        self,
+    ):
+        self.ref_count = 0
+
    async def __aenter__(self) -> Self:
+        self.ref_count += 1
        return self

    async def __aexit__(self, *excinfo):
-        if not self.is_done():
-            await self.rollback()
-    
+        self.ref_count -= 1
+        if self.ref_count == 0 and not self.is_done():
+            await self.release()
+
    @abstractmethod
    def is_done(self) -> bool:
        pass
@ -138,11 +156,20 @@ class TableDbTransaction(ABC):


 class TableDb(ABC):
+    ref_count: int
+
+    def __init__(
+        self,
+    ):
+        self.ref_count = 0
+
    async def __aenter__(self) -> Self:
+        self.ref_count += 1
        return self

    async def __aexit__(self, *excinfo):
-        if not self.is_done():
+        self.ref_count -= 1
+        if self.ref_count == 0 and not self.is_done():
            await self.release()

    @abstractmethod
@ -179,11 +206,20 @@ class TableDb(ABC):


 class CryptoSystem(ABC):
+    ref_count: int
+
+    def __init__(
+        self,
+    ):
+        self.ref_count = 0
+
    async def __aenter__(self) -> Self:
+        self.ref_count += 1
        return self

    async def __aexit__(self, *excinfo):
-        if not self.is_done():
+        self.ref_count -= 1
+        if self.ref_count == 0 and not self.is_done():
            await self.release()

    @abstractmethod
@ -306,11 +342,20 @@ class CryptoSystem(ABC):


 class VeilidAPI(ABC):
+    ref_count: int
+
+    def __init__(
+        self,
+    ):
+        self.ref_count = 0
+
    async def __aenter__(self) -> Self:
+        self.ref_count += 1
        return self

    async def __aexit__(self, *excinfo):
-        if not self.is_done():
+        self.ref_count -= 1
+        if self.ref_count == 0 and not self.is_done():
            await self.release()

    @abstractmethod
--- a/veilid-python/veilid/json_api.py
+++ b/veilid-python/veilid/json_api.py
@ -99,6 +99,8 @@ class _JsonVeilidAPI(VeilidAPI):
        update_callback: Callable[[VeilidUpdate], Awaitable],
        validate_schema: bool = True,
    ):
+        super().__init__()
+
        self.reader = reader
        self.writer = writer
        self.update_callback = update_callback
@ -308,7 +310,7 @@ class _JsonVeilidAPI(VeilidAPI):

        # Validate if we have a validator
        if response["op"] != req["op"]:
-            raise ValueError("Response op does not match request op")
+            raise ValueError(f"Response op does not match request op: {response['op']} != {req['op']}")
        if validate is not None:
            validate(req, response)

@ -336,6 +338,12 @@ class _JsonVeilidAPI(VeilidAPI):
    async def new_custom_private_route(
        self, kinds: list[CryptoKind], stability: Stability, sequencing: Sequencing
    ) -> tuple[RouteId, bytes]:
+        assert isinstance(kinds, list)
+        for k in kinds:
+            assert isinstance(k, CryptoKind)
+        assert isinstance(stability, Stability)
+        assert isinstance(sequencing, Sequencing)
+
        return NewPrivateRouteResult.from_json(
            raise_api_result(
                await self.send_ndjson_request(
@ -348,6 +356,8 @@ class _JsonVeilidAPI(VeilidAPI):
        ).to_tuple()

    async def import_remote_private_route(self, blob: bytes) -> RouteId:
+        assert isinstance(blob, bytes)
+
        return RouteId(
            raise_api_result(
                await self.send_ndjson_request(Operation.IMPORT_REMOTE_PRIVATE_ROUTE, blob=blob)
@ -355,11 +365,16 @@ class _JsonVeilidAPI(VeilidAPI):
        )

    async def release_private_route(self, route_id: RouteId):
+        assert isinstance(route_id, RouteId)
+
        raise_api_result(
            await self.send_ndjson_request(Operation.RELEASE_PRIVATE_ROUTE, route_id=route_id)
        )

    async def app_call_reply(self, call_id: OperationId, message: bytes):
+        assert isinstance(call_id, OperationId)
+        assert isinstance(message, bytes)
+
        raise_api_result(
            await self.send_ndjson_request(
                Operation.APP_CALL_REPLY, call_id=call_id, message=message
@ -371,6 +386,9 @@ class _JsonVeilidAPI(VeilidAPI):
        return _JsonRoutingContext(self, rc_id)

    async def open_table_db(self, name: str, column_count: int) -> TableDb:
+        assert isinstance(name, str)
+        assert isinstance(column_count, int)
+
        db_id = raise_api_result(
            await self.send_ndjson_request(
                Operation.OPEN_TABLE_DB, name=name, column_count=column_count
@ -379,11 +397,15 @@ class _JsonVeilidAPI(VeilidAPI):
        return _JsonTableDb(self, db_id)

    async def delete_table_db(self, name: str) -> bool:
+        assert isinstance(name, str)
+
        return raise_api_result(
            await self.send_ndjson_request(Operation.DELETE_TABLE_DB, name=name)
        )

    async def get_crypto_system(self, kind: CryptoKind) -> CryptoSystem:
+        assert isinstance(kind, CryptoKind)
+
        cs_id = raise_api_result(
            await self.send_ndjson_request(Operation.GET_CRYPTO_SYSTEM, kind=kind)
        )
@ -396,6 +418,13 @@ class _JsonVeilidAPI(VeilidAPI):
    async def verify_signatures(
        self, node_ids: list[TypedKey], data: bytes, signatures: list[TypedSignature]
    ) -> Optional[list[TypedKey]]:
+        assert isinstance(node_ids, list)
+        for ni in node_ids:
+            assert isinstance(ni, TypedKey)
+        assert isinstance(data, bytes)
+        for sig in signatures:
+            assert isinstance(sig, TypedSignature)
+
        out = raise_api_result(
                await self.send_ndjson_request(
                    Operation.VERIFY_SIGNATURES,
@ -416,6 +445,11 @@ class _JsonVeilidAPI(VeilidAPI):
    async def generate_signatures(
        self, data: bytes, key_pairs: list[TypedKeyPair]
    ) -> list[TypedSignature]:
+        assert isinstance(data, bytes)
+        assert isinstance(key_pairs, list)
+        for kp in key_pairs:
+            assert isinstance(kp, TypedKeyPair)
+
        return list(
            map(
                lambda x: TypedSignature(x),
@ -428,6 +462,8 @@ class _JsonVeilidAPI(VeilidAPI):
        )

    async def generate_key_pair(self, kind: CryptoKind) -> list[TypedKeyPair]:
+        assert isinstance(kind, CryptoKind)
+
        return list(
            map(
                lambda x: TypedKeyPair(x),
@ -441,6 +477,7 @@ class _JsonVeilidAPI(VeilidAPI):
        return Timestamp(raise_api_result(await self.send_ndjson_request(Operation.NOW)))

    async def debug(self, command: str) -> str:
+        assert isinstance(command, str)
        return raise_api_result(await self.send_ndjson_request(Operation.DEBUG, command=command))

    async def veilid_version_string(self) -> str:
@ -459,7 +496,7 @@ class _JsonVeilidAPI(VeilidAPI):

 def validate_rc_op(request: dict, response: dict):
    if response["rc_op"] != request["rc_op"]:
-        raise ValueError("Response rc_op does not match request rc_op")
+        raise ValueError(f"Response rc_op does not match request rc_op: {response["rc_op"]} != {request["rc_op"]}")


 class _JsonRoutingContext(RoutingContext):
@ -468,6 +505,8 @@ class _JsonRoutingContext(RoutingContext):
    done: bool

    def __init__(self, api: _JsonVeilidAPI, rc_id: int):
+        super().__init__()
+
        self.api = api
        self.rc_id = rc_id
        self.done = False
@ -497,6 +536,8 @@ class _JsonRoutingContext(RoutingContext):
        self.done = True

    async def with_default_safety(self, release=True) -> Self:
+        assert isinstance(release, bool)
+
        new_rc_id = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -510,6 +551,9 @@ class _JsonRoutingContext(RoutingContext):
        return self.__class__(self.api, new_rc_id)

    async def with_safety(self, safety_selection: SafetySelection, release=True) -> Self:
+        assert isinstance(safety_selection, SafetySelection)
+        assert isinstance(release, bool)
+
        new_rc_id = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -524,6 +568,9 @@ class _JsonRoutingContext(RoutingContext):
        return self.__class__(self.api, new_rc_id)

    async def with_sequencing(self, sequencing: Sequencing, release=True) -> Self:
+        assert isinstance(sequencing, Sequencing)
+        assert isinstance(release, bool)
+
        new_rc_id = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -551,6 +598,9 @@ class _JsonRoutingContext(RoutingContext):
            )
        )
    async def app_call(self, target: TypedKey | RouteId, message: bytes) -> bytes:
+        assert isinstance(target, TypedKey) or isinstance(target, RouteId)
+        assert isinstance(message, bytes)
+
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -565,6 +615,9 @@ class _JsonRoutingContext(RoutingContext):
        )

    async def app_message(self, target: TypedKey | RouteId, message: bytes):
+        assert isinstance(target, TypedKey) or isinstance(target, RouteId)
+        assert isinstance(message, bytes)
+
        raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -579,6 +632,10 @@ class _JsonRoutingContext(RoutingContext):
    async def create_dht_record(
        self, schema: DHTSchema, owner: Optional[KeyPair] = None, kind: Optional[CryptoKind] = None
    ) -> DHTRecordDescriptor:
+        assert isinstance(schema, DHTSchema)
+        assert owner is None or isinstance(owner, KeyPair)
+        assert kind is None or isinstance(kind, CryptoKind)
+
        return DHTRecordDescriptor.from_json(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -596,6 +653,9 @@ class _JsonRoutingContext(RoutingContext):
    async def open_dht_record(
        self, key: TypedKey, writer: Optional[KeyPair] = None
    ) -> DHTRecordDescriptor:
+        assert isinstance(key, TypedKey)
+        assert writer is None or isinstance(writer, KeyPair)
+
        return DHTRecordDescriptor.from_json(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -610,6 +670,8 @@ class _JsonRoutingContext(RoutingContext):
        )

    async def close_dht_record(self, key: TypedKey):
+        assert isinstance(key, TypedKey)
+
        raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -621,6 +683,8 @@ class _JsonRoutingContext(RoutingContext):
        )

    async def delete_dht_record(self, key: TypedKey):
+        assert isinstance(key, TypedKey)
+
        raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -634,6 +698,10 @@ class _JsonRoutingContext(RoutingContext):
    async def get_dht_value(
        self, key: TypedKey, subkey: ValueSubkey, force_refresh: bool = False
    ) -> Optional[ValueData]:
+        assert isinstance(key, TypedKey)
+        assert isinstance(subkey, ValueSubkey)
+        assert isinstance(force_refresh, bool)
+
        ret = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -650,6 +718,11 @@ class _JsonRoutingContext(RoutingContext):
    async def set_dht_value(
        self, key: TypedKey, subkey: ValueSubkey, data: bytes, writer: Optional[KeyPair] = None
    ) -> Optional[ValueData]:
+        assert isinstance(key, TypedKey)
+        assert isinstance(subkey, ValueSubkey)
+        assert isinstance(data, bytes)
+        assert writer is None or isinstance(writer, KeyPair)
+
        ret = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -671,6 +744,15 @@ class _JsonRoutingContext(RoutingContext):
        expiration: Timestamp = 0,
        count: int = 0xFFFFFFFF,
    ) -> Timestamp:
+        assert isinstance(key, TypedKey)
+        assert isinstance(subkeys, list)
+        for s in subkeys:
+            assert isinstance(s, tuple)
+            assert isinstance(s[0], ValueSubkey)
+            assert isinstance(s[1], ValueSubkey)
+        assert isinstance(expiration, Timestamp)
+        assert isinstance(count, int)
+
        return Timestamp(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -689,6 +771,13 @@ class _JsonRoutingContext(RoutingContext):
    async def cancel_dht_watch(
        self, key: TypedKey, subkeys: list[tuple[ValueSubkey, ValueSubkey]]
    ) -> bool:
+        assert isinstance(key, TypedKey)
+        assert isinstance(subkeys, list)
+        for s in subkeys:
+            assert isinstance(s, tuple)
+            assert isinstance(s[0], ValueSubkey)
+            assert isinstance(s[1], ValueSubkey)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.ROUTING_CONTEXT,
@ -706,6 +795,14 @@ class _JsonRoutingContext(RoutingContext):
        subkeys: list[tuple[ValueSubkey, ValueSubkey]],
        scope: DHTReportScope = DHTReportScope.LOCAL,
    ) -> DHTRecordReport:
+        assert isinstance(key, TypedKey)
+        assert isinstance(subkeys, list)
+        for s in subkeys:
+            assert isinstance(s, tuple)
+            assert isinstance(s[0], ValueSubkey)
+            assert isinstance(s[1], ValueSubkey)
+        assert isinstance(scope, DHTReportScope)
+
        return DHTRecordReport.from_json(            
            raise_api_result(
                await self.api.send_ndjson_request(
@ -728,7 +825,7 @@ class _JsonRoutingContext(RoutingContext):

 def validate_tx_op(request: dict, response: dict):
    if response["tx_op"] != request["tx_op"]:
-        raise ValueError("Response tx_op does not match request tx_op")
+        raise ValueError(f"Response tx_op does not match request tx_op: {response['tx_op']} != {request['tx_op']}")


 class _JsonTableDbTransaction(TableDbTransaction):
@ -737,6 +834,8 @@ class _JsonTableDbTransaction(TableDbTransaction):
    done: bool

    def __init__(self, api: _JsonVeilidAPI, tx_id: int):
+        super().__init__()
+
        self.api = api
        self.tx_id = tx_id
        self.done = False
@ -784,6 +883,10 @@ class _JsonTableDbTransaction(TableDbTransaction):
        self.done = True

    async def store(self, key: bytes, value: bytes, col: int = 0):
+        assert isinstance(key, bytes)
+        assert isinstance(value, bytes)
+        assert isinstance(col, int)
+
        await self.api.send_ndjson_request(
            Operation.TABLE_DB_TRANSACTION,
            validate=validate_tx_op,
@ -795,6 +898,9 @@ class _JsonTableDbTransaction(TableDbTransaction):
        )

    async def delete(self, key: bytes, col: int = 0):
+        assert isinstance(key, bytes)
+        assert isinstance(col, int)
+
        await self.api.send_ndjson_request(
            Operation.TABLE_DB_TRANSACTION,
            validate=validate_tx_op,
@ -810,7 +916,7 @@ class _JsonTableDbTransaction(TableDbTransaction):

 def validate_db_op(request: dict, response: dict):
    if response["db_op"] != request["db_op"]:
-        raise ValueError("Response db_op does not match request db_op")
+        raise ValueError(f"Response db_op does not match request db_op: {response['db_op']} != {request['db_op']}")


 class _JsonTableDb(TableDb):
@ -819,6 +925,8 @@ class _JsonTableDb(TableDb):
    done: bool

    def __init__(self, api: _JsonVeilidAPI, db_id: int):
+        super().__init__()
+
        self.api = api
        self.db_id = db_id
        self.done = False
@ -858,6 +966,8 @@ class _JsonTableDb(TableDb):
        )

    async def get_keys(self, col: int = 0) -> list[bytes]:
+        assert isinstance(col, int)
+
        return list(
            map(
                lambda x: urlsafe_b64decode_no_pad(x),
@ -885,6 +995,10 @@ class _JsonTableDb(TableDb):
        return _JsonTableDbTransaction(self.api, tx_id)

    async def store(self, key: bytes, value: bytes, col: int = 0):
+        assert isinstance(key, bytes)
+        assert isinstance(value, bytes)
+        assert isinstance(col, int)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.TABLE_DB,
@ -898,6 +1012,9 @@ class _JsonTableDb(TableDb):
        )

    async def load(self, key: bytes, col: int = 0) -> Optional[bytes]:
+        assert isinstance(key, bytes)
+        assert isinstance(col, int)
+
        res = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.TABLE_DB,
@ -911,6 +1028,9 @@ class _JsonTableDb(TableDb):
        return None if res is None else urlsafe_b64decode_no_pad(res)

    async def delete(self, key: bytes, col: int = 0) -> Optional[bytes]:
+        assert isinstance(key, bytes)
+        assert isinstance(col, int)
+
        res = raise_api_result(
            await self.api.send_ndjson_request(
                Operation.TABLE_DB,
@ -929,7 +1049,7 @@ class _JsonTableDb(TableDb):

 def validate_cs_op(request: dict, response: dict):
    if response["cs_op"] != request["cs_op"]:
-        raise ValueError("Response cs_op does not match request cs_op")
+        raise ValueError(f"Response cs_op does not match request cs_op: {response['cs_op']} != {request['cs_op']}")


 class _JsonCryptoSystem(CryptoSystem):
@ -938,6 +1058,8 @@ class _JsonCryptoSystem(CryptoSystem):
    done: bool

    def __init__(self, api: _JsonVeilidAPI, cs_id: int):
+        super().__init__()
+
        self.api = api
        self.cs_id = cs_id
        self.done = False
@ -979,6 +1101,9 @@ class _JsonCryptoSystem(CryptoSystem):
        self.done = True

    async def cached_dh(self, key: PublicKey, secret: SecretKey) -> SharedSecret:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
+
        return SharedSecret(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -993,6 +1118,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def compute_dh(self, key: PublicKey, secret: SecretKey) -> SharedSecret:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
+
        return SharedSecret(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1007,6 +1135,10 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def generate_shared_secret(self, key: PublicKey, secret: SecretKey, domain: bytes) -> SharedSecret:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
+        assert isinstance(domain, bytes)
+
        return SharedSecret(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1022,6 +1154,8 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def random_bytes(self, len: int) -> bytes:
+        assert isinstance(len, int)
+
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1045,6 +1179,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def hash_password(self, password: bytes, salt: bytes) -> str:
+        assert isinstance(password, bytes)
+        assert isinstance(salt, bytes)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.CRYPTO_SYSTEM,
@ -1057,6 +1194,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def verify_password(self, password: bytes, password_hash: str) -> bool:
+        assert isinstance(password, bytes)
+        assert isinstance(password_hash, str)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.CRYPTO_SYSTEM,
@ -1069,6 +1209,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def derive_shared_secret(self, password: bytes, salt: bytes) -> SharedSecret:
+        assert isinstance(password, bytes)
+        assert isinstance(salt, bytes)
+
        return SharedSecret(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1119,6 +1262,8 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def generate_hash(self, data: bytes) -> HashDigest:
+        assert isinstance(data, bytes)
+
        return HashDigest(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1132,6 +1277,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def validate_key_pair(self, key: PublicKey, secret: SecretKey) -> bool:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.CRYPTO_SYSTEM,
@ -1144,6 +1292,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def validate_hash(self, data: bytes, hash_digest: HashDigest) -> bool:
+        assert isinstance(data, bytes)
+        assert isinstance(hash_digest, HashDigest)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.CRYPTO_SYSTEM,
@ -1156,6 +1307,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def distance(self, key1: CryptoKey, key2: CryptoKey) -> CryptoKeyDistance:
+        assert isinstance(key1, CryptoKey)
+        assert isinstance(key2, CryptoKey)
+
        return CryptoKeyDistance(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1170,6 +1324,10 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def sign(self, key: PublicKey, secret: SecretKey, data: bytes) -> Signature:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
+        assert isinstance(data, bytes)
+
        return Signature(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1185,6 +1343,10 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def verify(self, key: PublicKey, data: bytes, signature: Signature):
+        assert isinstance(key, PublicKey)
+        assert isinstance(data, bytes)
+        assert isinstance(signature, Signature)
+
        return raise_api_result(
            await self.api.send_ndjson_request(
                Operation.CRYPTO_SYSTEM,
@ -1214,6 +1376,11 @@ class _JsonCryptoSystem(CryptoSystem):
        shared_secret: SharedSecret,
        associated_data: Optional[bytes],
    ) -> bytes:
+        assert isinstance(body, bytes)
+        assert isinstance(nonce, Nonce)
+        assert isinstance(shared_secret, SharedSecret)
+        assert associated_data is None or isinstance(associated_data, bytes)
+
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1236,6 +1403,11 @@ class _JsonCryptoSystem(CryptoSystem):
        shared_secret: SharedSecret,
        associated_data: Optional[bytes],
    ) -> bytes:
+        assert isinstance(body, bytes)
+        assert isinstance(nonce, Nonce)
+        assert isinstance(shared_secret, SharedSecret)
+        assert associated_data is None or isinstance(associated_data, bytes)
+
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -1252,6 +1424,9 @@ class _JsonCryptoSystem(CryptoSystem):
        )

    async def crypt_no_auth(self, body: bytes, nonce: Nonce, shared_secret: SharedSecret) -> bytes:
+        assert isinstance(body, bytes)
+        assert isinstance(nonce, Nonce)
+        assert isinstance(shared_secret, SharedSecret)
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
--- a/veilid-python/veilid/types.py
+++ b/veilid-python/veilid/types.py
@ -2,7 +2,7 @@ import base64
 import json
 from enum import StrEnum
 from functools import total_ordering
-from typing import Any, Optional, Self, Tuple
+from typing import Any, Optional, Self

 ####################################################################

@ -122,6 +122,7 @@ class EncodedString(str):

    @classmethod
    def from_bytes(cls, b: bytes) -> Self:
+        assert isinstance(b, bytes)
        return cls(urlsafe_b64encode_no_pad(b))


@ -160,6 +161,8 @@ class Nonce(EncodedString):
 class KeyPair(str):
    @classmethod
    def from_parts(cls, key: PublicKey, secret: SecretKey) -> Self:
+        assert isinstance(key, PublicKey)
+        assert isinstance(secret, SecretKey)
        return cls(f"{key}:{secret}")

    def key(self) -> PublicKey:
@ -168,7 +171,7 @@ class KeyPair(str):
    def secret(self) -> SecretKey:
        return SecretKey(self.split(":", 1)[1])

-    def to_parts(self) -> Tuple[PublicKey, SecretKey]:
+    def to_parts(self) -> tuple[PublicKey, SecretKey]:
        public, secret = self.split(":", 1)
        return (PublicKey(public), SecretKey(secret))

@ -188,6 +191,8 @@ class CryptoTyped(str):
 class TypedKey(CryptoTyped):
    @classmethod
    def from_value(cls, kind: CryptoKind, value: PublicKey) -> Self:
+        assert isinstance(kind, CryptoKind)
+        assert isinstance(value, PublicKey)
        return cls(f"{kind}:{value}")

    def value(self) -> PublicKey:
@ -197,6 +202,8 @@ class TypedKey(CryptoTyped):
 class TypedSecret(CryptoTyped):
    @classmethod
    def from_value(cls, kind: CryptoKind, value: SecretKey) -> Self:
+        assert isinstance(kind, CryptoKind)
+        assert isinstance(value, SecretKey)
        return cls(f"{kind}:{value}")

    def value(self) -> SecretKey:
@ -206,6 +213,8 @@ class TypedSecret(CryptoTyped):
 class TypedKeyPair(CryptoTyped):
    @classmethod
    def from_value(cls, kind: CryptoKind, value: KeyPair) -> Self:
+        assert isinstance(kind, CryptoKind)
+        assert isinstance(value, KeyPair)
        return cls(f"{kind}:{value}")

    def value(self) -> KeyPair:
@ -215,6 +224,8 @@ class TypedKeyPair(CryptoTyped):
 class TypedSignature(CryptoTyped):
    @classmethod
    def from_value(cls, kind: CryptoKind, value: Signature) -> Self:
+        assert isinstance(kind, CryptoKind)
+        assert isinstance(value, Signature)
        return cls(f"{kind}:{value}")

    def value(self) -> Signature:
@ -226,7 +237,7 @@ class ValueSubkey(int):


 class ValueSeqNum(int):
-    pass
+    NONE = 4294967295


 ####################################################################
@ -284,10 +295,13 @@ class NewPrivateRouteResult:
    blob: bytes

    def __init__(self, route_id: RouteId, blob: bytes):
+        assert isinstance(route_id, RouteId)
+        assert isinstance(blob, bytes)
+
        self.route_id = route_id
        self.blob = blob

-    def to_tuple(self) -> Tuple[RouteId, bytes]:
+    def to_tuple(self) -> tuple[RouteId, bytes]:
        return (self.route_id, self.blob)

    @classmethod
@ -300,6 +314,9 @@ class DHTSchemaSMPLMember:
    m_cnt: int

    def __init__(self, m_key: PublicKey, m_cnt: int):
+        assert isinstance(m_key, PublicKey)
+        assert isinstance(m_cnt, int)
+
        self.m_key = m_key
        self.m_cnt = m_cnt

@ -321,10 +338,15 @@ class DHTSchema:

    @classmethod
    def dflt(cls, o_cnt: int) -> Self:
+        assert isinstance(o_cnt, int)
        return cls(DHTSchemaKind.DFLT, o_cnt=o_cnt)

    @classmethod
    def smpl(cls, o_cnt: int, members: list[DHTSchemaSMPLMember]) -> Self:
+        assert isinstance(o_cnt, int)
+        assert isinstance(members, list)
+        for m in members:
+            assert isinstance(m, DHTSchemaSMPLMember)
        return cls(DHTSchemaKind.SMPL, o_cnt=o_cnt, members=members)

    @classmethod
@ -404,8 +426,8 @@ class DHTRecordReport:
    @classmethod
    def from_json(cls, j: dict) -> Self:
        return cls(
-            [[p[0], p[1]] for p in j["subkeys"]],
-            [[p[0], p[1]] for p in j["offline_subkeys"]],
+            [(p[0], p[1]) for p in j["subkeys"]],
+            [(p[0], p[1]) for p in j["offline_subkeys"]],
            [ValueSeqNum(s) for s in j["local_seqs"]],
            [ValueSeqNum(s) for s in j["network_seqs"]],
        )
--- a/veilid-server/Cargo.toml
+++ b/veilid-server/Cargo.toml
@ -53,8 +53,8 @@ geolocation = ["veilid-core/geolocation"]

 [dependencies]
 veilid-core = { path = "../veilid-core", default-features = false }
-tracing = { version = "^0.1.40", features = ["log", "attributes"] }
-tracing-subscriber = { version = "^0.3.18", features = ["env-filter", "time"] }
+tracing = { version = "^0.1.41", features = ["log", "attributes"] }
+tracing-subscriber = { version = "^0.3.19", features = ["env-filter", "time"] }
 tracing-appender = "^0.2.3"
 tracing-opentelemetry = "^0.24.0"
 # Buggy: tracing-error = "^0"
@ -62,21 +62,21 @@ opentelemetry = { version = "^0.23" }
 opentelemetry-otlp = { version = "^0.16.0", default-features = false, optional = true }
 opentelemetry_sdk = "0.23.0"
 opentelemetry-semantic-conventions = "^0.16.0"
-async-std = { version = "^1.12.0", features = ["unstable"], optional = true }
-tokio = { version = "^1.38.1", features = ["full", "tracing"], optional = true }
-tokio-stream = { version = "^0.1.15", features = ["net"], optional = true }
-tokio-util = { version = "^0.7.11", features = ["compat"], optional = true }
+async-std = { version = "^1.13.0", features = ["unstable"], optional = true }
+tokio = { version = "^1.43.0", features = ["full", "tracing"], optional = true }
+tokio-stream = { version = "^0.1.17", features = ["net"], optional = true }
+tokio-util = { version = "^0.7.13", features = ["compat"], optional = true }
 console-subscriber = { version = "^0.3.0", optional = true }
 async-tungstenite = { version = "^0.27.0", features = ["async-tls"] }
 color-eyre = { version = "^0.6.3", default-features = false }
 backtrace = "^0.3.71"
-clap = { version = "^4.5.9", features = ["derive", "string", "wrap_help"] }
+clap = { version = "^4.5.31", features = ["derive", "string", "wrap_help"] }
 directories = "^5.0.1"
 parking_lot = "^0.12.3"
-config = { version = "^0.14.0", default-features = false, features = ["yaml"] }
+config = { version = "^0.14.1", default-features = false, features = ["yaml"] }
 cfg-if = "^1.0.0"
-serde = "^1.0.204"
-serde_derive = "^1.0.204"
+serde = "^1.0.218"
+serde_derive = "^1.0.218"
 serde_yaml = { package = "serde_yaml_ng", version = "^0.10.0" }
 json = "^0"
 futures-util = { version = "^0", default-features = false, features = [
@ -91,10 +91,10 @@ rpassword = "^7"
 hostname = "^0"
 stop-token = { version = "^0", default-features = false }
 sysinfo = { version = "^0.30.13", default-features = false }
-wg = { version = "^0.9.1", features = ["future"] }
+wg = { version = "^0.9.2", features = ["future"] }
 tracing-flame = { version = "0.2.0", optional = true }
-time = { version = "0.3.36", features = ["local-offset"] }
-chrono = "0.4.38"
+time = { version = "0.3.38", features = ["local-offset"] }
+chrono = "0.4.40"

 [target.'cfg(windows)'.dependencies]
 windows-service = "^0"
@ -108,10 +108,10 @@ nix = "^0.29.0"
 tracing-perfetto = { version = "0.1.5", optional = true }

 [target.'cfg(target_os = "linux")'.dependencies]
-tracing-journald = "^0.3.0"
+tracing-journald = "^0.3.1"

 [dev-dependencies]
-serial_test = "^3.1.1"
+serial_test = "^3.2.0"

 [lints]
 workspace = true
--- a/veilid-tools/Cargo.toml
+++ b/veilid-tools/Cargo.toml
@ -47,6 +47,7 @@ veilid_tools_android_tests = ["dep:paranoid-android"]
 veilid_tools_ios_tests = ["dep:tracing", "dep:oslog", "dep:tracing-oslog"]
 tracing = ["dep:tracing", "dep:tracing-subscriber", "tokio/tracing"]
 debug-locks = []
+debug-duration-timeout = []

 virtual-network = []
 virtual-network-server = [
@ -67,66 +68,65 @@ virtual-router-bin = [
 ]

 [dependencies]
-tracing = { version = "0.1.40", features = [
+tracing = { version = "0.1.41", features = [
    "log",
    "attributes",
 ], optional = true }
-tracing-subscriber = { version = "0.3.18", features = [
+tracing-subscriber = { version = "0.3.19", features = [
    "env-filter",
    "time",
 ], optional = true }
-log = { version = "0.4.22" }
+log = { version = "0.4.26" }
 eyre = "0.6.12"
 static_assertions = "1.1.0"
-serde = { version = "1.0.214", features = ["derive", "rc"] }
-postcard = { version = "1.0.10", features = ["use-std"] }
+serde = { version = "1.0.218", features = ["derive", "rc"] }
+postcard = { version = "1.1.1", features = ["use-std"] }
 cfg-if = "1.0.0"
-thiserror = "1.0.63"
-futures-util = { version = "0.3.30", default-features = false, features = [
+thiserror = "1.0.69"
+futures-util = { version = "0.3.31", default-features = false, features = [
    "alloc",
 ] }
 futures_codec = "0.4.1"
 parking_lot = "0.12.3"
 async-lock = "3.4.0"
-once_cell = "1.19.0"
+once_cell = "1.20.3"
 stop-token = { version = "0.7.0", default-features = false }
 rand = "0.8.5"
 rand_core = "0.6.4"
-backtrace = "0.3.71"
+backtrace = "^0.3.71"
 fn_name = "0.1.0"
 range-set-blaze = "0.1.16"
-flume = { version = "0.11.0", features = ["async"] }
+flume = { version = "0.11.1", features = ["async"] }
 imbl = { version = "3.0.0", features = ["serde"] }

-
 # Dependencies for native builds only
 # Linux, Windows, Mac, iOS, Android
 [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies]
 async-io = { version = "1.13.0" }
-async-std = { version = "1.12.0", features = ["unstable"], optional = true }
+async-std = { version = "1.13.0", features = ["unstable"], optional = true }
 bugsalot = { package = "veilid-bugsalot", version = "0.2.0", optional = true }
-time = { version = "0.3.36", features = [
+time = { version = "0.3.38", features = [
    "local-offset",
    "formatting",
 ], optional = true }
-chrono = "0.4.38"
+chrono = "0.4.40"
 ctrlc = "^3"
-futures-util = { version = "0.3.30", default-features = false, features = [
+futures-util = { version = "0.3.31", default-features = false, features = [
    "async-await",
    "sink",
    "std",
    "io",
 ] }
 indent = { version = "0.1.1", optional = true }
-libc = "0.2.155"
+libc = "0.2.170"
 nix = { version = "0.27.1", features = ["user"] }
-socket2 = { version = "0.5.7", features = ["all"] }
-tokio = { version = "1.38.1", features = ["full"], optional = true }
-tokio-util = { version = "0.7.11", features = ["compat"], optional = true }
-tokio-stream = { version = "0.1.15", features = ["net"], optional = true }
+socket2 = { version = "0.5.8", features = ["all"] }
+tokio = { version = "1.43.0", features = ["full"], optional = true }
+tokio-util = { version = "0.7.13", features = ["compat"], optional = true }
+tokio-stream = { version = "0.1.17", features = ["net"], optional = true }

 ws_stream_tungstenite = { version = "0.14.0", optional = true }
-async-tungstenite = { version = "0.28.0", optional = true }
+async-tungstenite = { version = "0.28.2", optional = true }
 clap = { version = "4", features = ["derive"], optional = true }
 ipnet = { version = "2", features = ["serde"], optional = true }
 serde_yaml = { package = "serde_yaml_ng", version = "^0.10.0", optional = true }
@ -135,9 +135,9 @@ rand_chacha = { version = "0.3.1", optional = true }

 # Dependencies for WASM builds only
 [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
-wasm-bindgen = "0.2.92"
-js-sys = "0.3.70"
-wasm-bindgen-futures = "0.4.42"
+wasm-bindgen = "0.2.100"
+js-sys = "0.3.77"
+wasm-bindgen-futures = "0.4.50"
 async_executors = { version = "0.7.0", default-features = false }
 getrandom = { version = "0.2", features = ["js"] }
 ws_stream_wasm = { version = "0.7.4", optional = true }
@ -183,10 +183,9 @@ serial_test = { version = "2.0.0", default-features = false, features = [
    "async",
 ] }
 console_error_panic_hook = "0.1.7"
-wasm-bindgen-test = "0.3.42"
-wee_alloc = "0.4.5"
+wasm-bindgen-test = "0.3.50"
 wasm-logger = "0.2.0"
-tracing-wasm = { version = "0.2.1" }
+veilid-tracing-wasm = "^0"

 ### BUILD OPTIONS

--- a/veilid-tools/src/assembly_buffer.rs
+++ b/veilid-tools/src/assembly_buffer.rs
@ -14,9 +14,6 @@ type SequenceType = u16;
 const HEADER_LEN: usize = 8;
 const MAX_LEN: usize = LengthType::MAX as usize;

-// XXX: keep statistics on all drops and why we dropped them
-// XXX: move to config eventually?
-
 /// The hard-coded maximum fragment size used by AssemblyBuffer
 ///
 /// Eventually this should parameterized and made configurable.
@ -119,7 +116,7 @@ impl PeerMessages {
        let mut assembly = MessageAssembly {
            timestamp,
            seq,
-            data: vec![0u8; len as usize],
+            data: unsafe { unaligned_u8_vec_uninit(len as usize) },
            parts: RangeSetBlaze::from_iter([part_start..=part_end]),
        };
        assembly.data[part_start as usize..=part_end as usize].copy_from_slice(chunk);
@ -229,6 +226,7 @@ struct AssemblyBufferUnlockedInner {
 /// * No sequencing of packets. Packets may still be delivered to the application out of order, but this guarantees that only whole packets will be delivered if all of their fragments are received.

 #[derive(Clone)]
+#[must_use]
 pub struct AssemblyBuffer {
    inner: Arc<Mutex<AssemblyBufferInner>>,
    unlocked_inner: Arc<AssemblyBufferUnlockedInner>,
@ -247,7 +245,6 @@ impl AssemblyBuffer {
        }
    }

-    #[must_use]
    pub fn new() -> Self {
        Self {
            inner: Arc::new(Mutex::new(Self::new_inner())),
--- a/veilid-tools/src/async_tag_lock.rs
+++ b/veilid-tools/src/async_tag_lock.rs
@ -10,7 +10,7 @@ where
 {
    table: AsyncTagLockTable<T>,
    tag: T,
-    _guard: AsyncMutexGuardArc<()>,
+    guard: Option<AsyncMutexGuardArc<()>>,
 }

 impl<T> AsyncTagLockGuard<T>
@ -21,7 +21,7 @@ where
        Self {
            table,
            tag,
-            _guard: guard,
+            guard: Some(guard),
        }
    }
 }
@ -45,7 +45,8 @@ where
        if guards == 0 {
            inner.table.remove(&self.tag).unwrap();
        }
-        // Proceed with releasing _guard, which may cause some concurrent tag lock to acquire
+        // Proceed with releasing guard, which may cause some concurrent tag lock to acquire
+        drop(self.guard.take());
    }
 }

@ -153,7 +154,7 @@ where
            }
            std::collections::hash_map::Entry::Vacant(v) => {
                let mutex = Arc::new(AsyncMutex::new(()));
-                let guard = asyncmutex_try_lock_arc!(mutex)?;
+                let guard = asyncmutex_try_lock_arc!(mutex).unwrap();
                v.insert(AsyncTagLockTableEntry { mutex, guards: 1 });
                guard
            }
--- a/veilid-tools/src/network_result.rs
+++ b/veilid-tools/src/network_result.rs
@ -278,85 +278,3 @@ macro_rules! network_result_try {
        }
    };
 }
-
-#[macro_export]
-macro_rules! network_result_value_or_log {
-    ($self:ident $r:expr => $f:expr) => {
-        network_result_value_or_log!($self $r => [ "" ] $f )
-    };
-    ($self:ident $r:expr => [ $d:expr ] $f:expr) => { {
-        let __extra_message = if debug_target_enabled!("network_result") {
-            $d.to_string()
-        } else {
-            "".to_string()
-        };
-        match $r {
-            NetworkResult::Timeout => {
-                veilid_log!($self debug
-                    "{} at {}@{}:{} in {}{}",
-                    "Timeout",
-                    file!(),
-                    line!(),
-                    column!(),
-                    fn_name::uninstantiated!(),
-                    __extra_message
-                );
-                $f
-            }
-            NetworkResult::ServiceUnavailable(ref s) => {
-                veilid_log!($self debug
-                    "{}({}) at {}@{}:{} in {}{}",
-                    "ServiceUnavailable",
-                    s,
-                    file!(),
-                    line!(),
-                    column!(),
-                    fn_name::uninstantiated!(),
-                    __extra_message
-                );
-                $f
-            }
-            NetworkResult::NoConnection(ref e) => {
-                veilid_log!($self debug
-                    "{}({}) at {}@{}:{} in {}{}",
-                    "No connection",
-                    e.to_string(),
-                    file!(),
-                    line!(),
-                    column!(),
-                    fn_name::uninstantiated!(),
-                    __extra_message
-                );
-                $f
-            }
-            NetworkResult::AlreadyExists(ref e) => {
-                veilid_log!($self debug
-                    "{}({}) at {}@{}:{} in {}{}",
-                    "Already exists",
-                    e.to_string(),
-                    file!(),
-                    line!(),
-                    column!(),
-                    fn_name::uninstantiated!(),
-                    __extra_message
-                );
-                $f
-            }
-            NetworkResult::InvalidMessage(ref s) => {
-                veilid_log!($self debug
-                    "{}({}) at {}@{}:{} in {}{}",
-                    "Invalid message",
-                    s,
-                    file!(),
-                    line!(),
-                    column!(),
-                    fn_name::uninstantiated!(),
-                    __extra_message
-                );
-                $f
-            }
-            NetworkResult::Value(v) => v,
-        }
-    } };
-
-}
--- a/veilid-tools/src/timestamp.rs
+++ b/veilid-tools/src/timestamp.rs
@ -125,27 +125,33 @@ pub fn display_duration(dur: u64) -> String {
    let secs = dur / SEC;
    let dur = dur % SEC;
    let msecs = dur / MSEC;
+    let dur = dur % MSEC;

-    format!(
-        "{}{}{}{}.{:03}s",
-        if days != 0 {
-            format!("{}d", days)
-        } else {
-            "".to_owned()
-        },
-        if hours != 0 {
-            format!("{}h", hours)
-        } else {
-            "".to_owned()
-        },
-        if mins != 0 {
-            format!("{}m", mins)
-        } else {
-            "".to_owned()
-        },
-        secs,
-        msecs
-    )
+    // microseconds format
+    if days == 0 && hours == 0 && mins == 0 && secs == 0 {
+        format!("{}.{:03}ms", msecs, dur)
+    } else {
+        format!(
+            "{}{}{}{}.{:03}s",
+            if days != 0 {
+                format!("{}d", days)
+            } else {
+                "".to_owned()
+            },
+            if hours != 0 {
+                format!("{}h", hours)
+            } else {
+                "".to_owned()
+            },
+            if mins != 0 {
+                format!("{}m", mins)
+            } else {
+                "".to_owned()
+            },
+            secs,
+            msecs
+        )
+    }
 }

 #[must_use]
--- a/veilid-tools/src/tools.rs
+++ b/veilid-tools/src/tools.rs
@ -522,13 +522,33 @@ pub fn is_debug_backtrace_enabled() -> bool {
 }

 #[track_caller]
-pub fn debug_duration<R, F: Future<Output = R>, T: FnOnce() -> F>(f: T) -> impl Future<Output = R> {
-    let location = std::panic::Location::caller();
+pub fn debug_duration<R, F: Future<Output = R>, T: FnOnce() -> F>(
+    f: T,
+    opt_timeout_us: Option<u64>,
+) -> impl Future<Output = R> {
+    let location = core::panic::Location::caller();
    async move {
        let t1 = get_timestamp();
        let out = f().await;
        let t2 = get_timestamp();
-        debug!("duration@{}: {}", location, display_duration(t2 - t1));
+        let duration_us = t2 - t1;
+        if let Some(timeout_us) = opt_timeout_us {
+            if duration_us > timeout_us {
+                #[cfg(not(feature = "debug-duration-timeout"))]
+                debug!(
+                    "Excessive duration: {}\n{:?}",
+                    display_duration(duration_us),
+                    backtrace::Backtrace::new()
+                );
+                #[cfg(feature = "debug-duration-timeout")]
+                panic!(format!(
+                    "Duration panic timeout exceeded: {}",
+                    display_duration(duration_us)
+                ));
+            }
+        } else {
+            debug!("Duration: {} = {}", location, display_duration(duration_us),);
+        }
        out
    }
 }
--- a/veilid-tools/tests/web.rs
+++ b/veilid-tools/tests/web.rs
@ -9,10 +9,6 @@ use wasm_bindgen_test::*;

 wasm_bindgen_test_configure!(run_in_browser);

-extern crate wee_alloc;
-#[global_allocator]
-static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
-
 static SETUP_ONCE: Once = Once::new();
 pub fn setup() -> () {
    SETUP_ONCE.call_once(|| {
@ -22,8 +18,8 @@ pub fn setup() -> () {
                let config = veilid_tracing_wasm::WASMLayerConfig::new()
                    .with_report_logs_in_timings(false);
                    .with_max_level(Level::TRACE);
-                    .with_console_config(tracing_wasm::ConsoleConfig::ReportWithoutConsoleColor);
-                tracing_wasm::set_as_global_default_with_config(config);
+                    .with_console_config(veilid_tracing_wasm::ConsoleConfig::ReportWithoutConsoleColor);
+                veilid_tracing_wasm::set_as_global_default_with_config(config);
            } else {
                wasm_logger::init(wasm_logger::Config::default());
            }
--- a/veilid-wasm/Cargo.toml
+++ b/veilid-wasm/Cargo.toml
@ -28,7 +28,6 @@ tracing-subscriber = "^0"

 wasm-bindgen = { version = "^0", features = ["serde-serialize"] }
 console_error_panic_hook = "^0"
-wee_alloc = "^0"
 cfg-if = "^1"
 wasm-bindgen-futures = "^0"
 js-sys = "^0"
--- a/veilid-wasm/src/lib.rs
+++ b/veilid-wasm/src/lib.rs
@ -35,11 +35,6 @@ pub mod veilid_table_db_js;
 mod wasm_helpers;
 use wasm_helpers::*;

-// Allocator
-extern crate wee_alloc;
-#[global_allocator]
-static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
-
 // API Singleton
 lazy_static! {
    static ref VEILID_API: SendWrapper<RefCell<Option<veilid_core::VeilidAPI>>> =