Merge branch 'dht-testing' into 'main'

Networking Update See merge request veilid/veilid!37
2024-10-01 01:26:08 -04:00 · 2023-06-27 23:08:11 +00:00 · 2023-06-27 23:08:11 +00:00 · 55dda149f0
commit 55dda149f0
parent c59cfbbc59 fde70610cc
111 changed files with 3638 additions and 1170 deletions
--- a/.gitignore
+++ b/.gitignore
@ -61,3 +61,7 @@ $RECYCLE.BIN/
 ### Rust
 target/
 logs/
+
+flamegraph.svg
+perf.data
+perf.data.old
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -103,6 +103,28 @@
      ],
      "cwd": "${workspaceFolder}/target/debug/"
    },
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug veilid-tools unit test",
+      "cargo": {
+        "args": [
+          "test",
+          "--no-run",
+          "--features=rt-tokio",
+          "--manifest-path",
+          "veilid-tools/Cargo.toml"
+        ],
+        "filter": {
+          "kind": "cdylib",
+          "name": "veilid-tools"
+        }
+      },
+      "args": [
+        "${selectedText}"
+      ],
+      "cwd": "${workspaceFolder}/target/debug/"
+    },
    {
      "type": "lldb",
      "request": "launch",
--- a/Cargo.lock
+++ b/Cargo.lock
@ -6467,9 +6467,9 @@ dependencies = [
 "netlink-packet-route",
 "netlink-sys",
 "nix 0.26.2",
+ "num-traits",
 "once_cell",
 "owning_ref",
- "owo-colors",
 "paranoid-android",
 "parking_lot 0.12.1",
 "paste",
@ -6606,8 +6606,10 @@ dependencies = [
 "async_executors",
 "backtrace",
 "cfg-if 1.0.0",
+ "chrono",
 "console_error_panic_hook",
 "eyre",
+ "flume",
 "fn_name",
 "futures-util",
 "jni 0.21.1",
@ -6622,10 +6624,10 @@ dependencies = [
 "nix 0.26.2",
 "once_cell",
 "oslog",
- "owo-colors",
 "paranoid-android",
 "parking_lot 0.11.2",
 "rand 0.7.3",
+ "range-set-blaze",
 "rust-fsm",
 "send_wrapper 0.6.0",
 "serial_test",
--- a/doc/config/sample.config
+++ b/doc/config/sample.config
@ -49,6 +49,7 @@ core:
        client_whitelist_timeout_ms: 300000 
        reverse_connection_receipt_time_ms: 5000 
        hole_punch_receipt_time_ms: 5000
+        network_key_password: null
        routing_table:
            node_id: null
            node_id_secret: null
--- a/doc/config/veilid-server-config.md
+++ b/doc/config/veilid-server-config.md
@ -188,8 +188,9 @@ network:
    client_whitelist_timeout_ms: 300000 
    reverse_connection_receipt_time_ms: 5000 
    hole_punch_receipt_time_ms: 5000 
-    node_id: ''
-    node_id_secret: ''
+    network_key_password: null
+    node_id: null
+    node_id_secret: null
    bootstrap: ['bootstrap.veilid.net']
    upnp: true
    detect_address_changes: true
--- a/scripts/tools/keytool.py
+++ b/scripts/tools/keytool.py
@ -0,0 +1,99 @@
+import base64
+import sys
+import argparse
+
+def urlsafe_b64encode_no_pad(b: bytes) -> str:
+    """
+    Removes any `=` used as padding from the encoded string.
+    """
+    return base64.urlsafe_b64encode(b).decode().rstrip("=")
+
+
+def urlsafe_b64decode_no_pad(s: str) -> bytes:
+    """
+    Adds back in the required padding before decoding.
+    """
+    padding = 4 - (len(s) % 4)
+    s = s + ("=" * padding)
+    return base64.urlsafe_b64decode(s)
+
+
+def do_value(args):
+
+    key = urlsafe_b64decode_no_pad(args.key)
+
+    print("key:", key.hex())
+
+
+def dist(key1: bytes, key2: bytes) -> bytes:    
+    distance = bytearray(len(key1))
+    for n in range(len(key1)):
+        distance[n] = key1[n] ^ key2[n]
+
+    return bytes(distance)
+
+
+def do_distance(args):
+        
+    key1 = urlsafe_b64decode_no_pad(args.key1)
+    key2 = urlsafe_b64decode_no_pad(args.key2)
+
+    print("key1:", key1.hex())
+    print("key2:", key2.hex())
+
+    distance = dist(key1, key2)
+    print("dist:", distance.hex())
+
+def keycmp(key1: bytes, key2: bytes) -> int:
+    for n in range(len(key1)):
+        if key1[n] < key2[n]:
+            return -1
+        if key1[n] > key2[n]:
+            return 1
+    return 0
+
+def do_closer(args):
+        
+    key = urlsafe_b64decode_no_pad(args.key)
+    near = urlsafe_b64decode_no_pad(args.near)
+    far = urlsafe_b64decode_no_pad(args.far)
+
+    print(" key:", key.hex())
+    print("near:", near.hex())
+    print(" far:", far.hex())
+
+    distance_near = dist(key, near)
+    distance_far = dist(key, far)
+    
+    print("  dn:", distance_near.hex())
+    print("  df:", distance_far.hex())
+
+    c = keycmp(distance_near, distance_far)
+    print(" cmp:", c)
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(required=True)
+    
+    parser_value = subparsers.add_parser('value')
+    parser_value.add_argument('key', type=str)
+    parser_value.set_defaults(func=do_value)
+    
+    parser_value = subparsers.add_parser('distance')
+    parser_value.add_argument('key1', type=str)
+    parser_value.add_argument('key2', type=str)
+    parser_value.set_defaults(func=do_distance)
+    
+    parser_value = subparsers.add_parser('closer')
+    parser_value.add_argument('key', type=str)
+    parser_value.add_argument('near', type=str)
+    parser_value.add_argument('far', type=str)
+    parser_value.set_defaults(func=do_closer)
+    
+    args = parser.parse_args()
+    args.func(args)
+    
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main()
--- a/veilid-cli/src/command_processor.rs
+++ b/veilid-cli/src/command_processor.rs
@ -46,6 +46,7 @@ struct CommandProcessorInner {
    server_addr: Option<SocketAddr>,
    connection_waker: Eventual,
    last_call_id: Option<u64>,
+    enable_app_messages: bool,
 }

 #[derive(Clone)]
@ -66,6 +67,7 @@ impl CommandProcessor {
                server_addr: None,
                connection_waker: Eventual::new(),
                last_call_id: None,
+                enable_app_messages: false,
            })),
        }
    }
@ -122,6 +124,10 @@ reply <call id> <message>           reply to an AppCall not handled directly by
                                    <call id> must be exact call id reported in VeilidUpdate
                                    <message> can be a string (left trimmed) or
                                    it can start with a '#' followed by a string of undelimited hex bytes
+enable [flag]                       set a flag
+disable [flag]                      unset a flag
+                                    valid flags in include:
+                                       app_messages
 "#
            .to_owned(),
        );
@ -305,6 +311,52 @@ reply <call id> <message>           reply to an AppCall not handled directly by
        Ok(())
    }

+    pub fn cmd_enable(&self, rest: Option<String>, callback: UICallback) -> Result<(), String> {
+        trace!("CommandProcessor::cmd_enable");
+
+        let ui = self.ui_sender();
+        let this = self.clone();
+        spawn_detached_local(async move {
+            let flag = rest.clone().unwrap_or_default();
+            match flag.as_str() {
+                "app_messages" => {
+                    this.inner.lock().enable_app_messages = true;
+                    ui.add_node_event(Level::Info, format!("flag enabled: {}", flag));
+                    ui.send_callback(callback);
+                }
+                _ => {
+                    ui.add_node_event(Level::Error, format!("unknown flag: {}", flag));
+                    ui.send_callback(callback);
+                    return;
+                }
+            }
+        });
+        Ok(())
+    }
+
+    pub fn cmd_disable(&self, rest: Option<String>, callback: UICallback) -> Result<(), String> {
+        trace!("CommandProcessor::cmd_disable");
+
+        let ui = self.ui_sender();
+        let this = self.clone();
+        spawn_detached_local(async move {
+            let flag = rest.clone().unwrap_or_default();
+            match flag.as_str() {
+                "app_messages" => {
+                    this.inner.lock().enable_app_messages = false;
+                    ui.add_node_event(Level::Info, format!("flag disabled: {}", flag));
+                    ui.send_callback(callback);
+                }
+                _ => {
+                    ui.add_node_event(Level::Error, format!("unknown flag: {}", flag));
+                    ui.send_callback(callback);
+                    return;
+                }
+            }
+        });
+        Ok(())
+    }
+
    pub fn run_command(&self, command_line: &str, callback: UICallback) -> Result<(), String> {
        //
        let (cmd, rest) = Self::word_split(command_line);
@ -319,6 +371,8 @@ reply <call id> <message>           reply to an AppCall not handled directly by
            "debug" => self.cmd_debug(rest, callback),
            "change_log_level" => self.cmd_change_log_level(rest, callback),
            "reply" => self.cmd_reply(rest, callback),
+            "enable" => self.cmd_enable(rest, callback),
+            "disable" => self.cmd_disable(rest, callback),
            _ => {
                let ui = self.ui_sender();
                ui.send_callback(callback);
@ -472,6 +526,10 @@ reply <call id> <message>           reply to an AppCall not handled directly by
    }

    pub fn update_app_message(&self, msg: &json::JsonValue) {
+        if !self.inner.lock().enable_app_messages {
+            return;
+        }
+
        let message = json_str_vec_u8(&msg["message"]);

        // check is message body is ascii printable
@ -482,19 +540,34 @@ reply <call id> <message>           reply to an AppCall not handled directly by
            }
        }

+        let (message, truncated) = if message.len() > 64 {
+            (&message[0..64], true)
+        } else {
+            (&message[..], false)
+        };
+
        let strmsg = if printable {
-            String::from_utf8_lossy(&message).to_string()
+            format!("\"{}\"", String::from_utf8_lossy(&message).to_string())
        } else {
            hex::encode(message)
        };

        self.inner().ui_sender.add_node_event(
            Level::Info,
-            format!("AppMessage ({:?}): {}", msg["sender"], strmsg),
+            format!(
+                "AppMessage ({:?}): {}{}",
+                msg["sender"],
+                strmsg,
+                if truncated { "..." } else { "" }
+            ),
        );
    }

    pub fn update_app_call(&self, call: &json::JsonValue) {
+        if !self.inner.lock().enable_app_messages {
+            return;
+        }
+
        let message = json_str_vec_u8(&call["message"]);

        // check is message body is ascii printable
@ -505,10 +578,16 @@ reply <call id> <message>           reply to an AppCall not handled directly by
            }
        }

-        let strmsg = if printable {
-            String::from_utf8_lossy(&message).to_string()
+        let (message, truncated) = if message.len() > 64 {
+            (&message[0..64], true)
        } else {
-            format!("#{}", hex::encode(&message))
+            (&message[..], false)
+        };
+
+        let strmsg = if printable {
+            format!("\"{}\"", String::from_utf8_lossy(&message).to_string())
+        } else {
+            hex::encode(message)
        };

        let id = json_str_u64(&call["call_id"]);
@ -516,8 +595,11 @@ reply <call id> <message>           reply to an AppCall not handled directly by
        self.inner().ui_sender.add_node_event(
            Level::Info,
            format!(
-                "AppCall ({:?}) id = {:016x} : {}",
-                call["sender"], id, strmsg
+                "AppCall ({:?}) id = {:016x} : {}{}",
+                call["sender"],
+                id,
+                strmsg,
+                if truncated { "..." } else { "" }
            ),
        );

--- a/veilid-core/Cargo.toml
+++ b/veilid-core/Cargo.toml
@ -15,6 +15,7 @@ crypto-test = ["enable-crypto-vld0", "enable-crypto-none"]
 crypto-test-none = ["enable-crypto-none"]
 enable-crypto-vld0 = []
 enable-crypto-none = []
+verbose-tracing = []
 rt-async-std = ["async-std", "async-std-resolver", "async_executors/async_std", "rtnetlink/smol_socket", "veilid-tools/rt-async-std"]
 rt-tokio = ["tokio", "tokio-util", "tokio-stream", "trust-dns-resolver/tokio-runtime", "async_executors/tokio_tp", "async_executors/tokio_io", "async_executors/tokio_timer", "rtnetlink/tokio_socket", "veilid-tools/rt-tokio"]
 rt-wasm-bindgen = ["veilid-tools/rt-wasm-bindgen", "async_executors/bindgen"]
@ -22,6 +23,7 @@ rt-wasm-bindgen = ["veilid-tools/rt-wasm-bindgen", "async_executors/bindgen"]
 veilid_core_android_tests = ["dep:paranoid-android"]
 veilid_core_ios_tests = ["dep:tracing-oslog"]
 tracking = []
+network-result-extra = ["veilid-tools/network-result-extra"]

 [dependencies]
 veilid-tools = { path = "../veilid-tools", features = [ "tracing" ] }
@ -52,8 +54,8 @@ owning_ref = "^0"
 flume = { version = "^0", features = ["async"] }
 enumset = { version= "^1", features = ["serde"] }
 backtrace = { version = "^0" }
-owo-colors = "^3"
 stop-token = { version = "^0", default-features = false }
+num-traits = "0.2.15"

 ed25519-dalek = { version = "^1", default_features = false, features = ["alloc", "u64_backend"] }
 x25519-dalek = { version = "^1", default_features = false, features = ["u64_backend"] }
--- a/veilid-core/src/attachment_manager.rs
+++ b/veilid-core/src/attachment_manager.rs
@ -196,7 +196,6 @@ impl AttachmentManager {
            if let Err(err) = netman.startup().await {
                error!("network startup failed: {}", err);
                netman.shutdown().await;
-                restart = true;
                break;
            }

--- a/veilid-core/src/crypto/byte_array_types.rs
+++ b/veilid-core/src/crypto/byte_array_types.rs
@ -77,18 +77,7 @@ where

 macro_rules! byte_array_type {
    ($name:ident, $size:expr, $encoded_size:expr) => {
-        #[derive(
-            Clone,
-            Copy,
-            Hash,
-            Eq,
-            PartialEq,
-            PartialOrd,
-            Ord,
-            RkyvArchive,
-            RkyvSerialize,
-            RkyvDeserialize,
-        )]
+        #[derive(Clone, Copy, Hash, RkyvArchive, RkyvSerialize, RkyvDeserialize)]
        #[archive_attr(repr(C), derive(CheckBytes, Hash, Eq, PartialEq, PartialOrd, Ord))]
        pub struct $name {
            pub bytes: [u8; $size],
@ -125,6 +114,32 @@ macro_rules! byte_array_type {
            }
        }

+        impl PartialOrd for $name {
+            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+                Some(self.cmp(other))
+            }
+        }
+
+        impl Ord for $name {
+            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+                for n in 0..$size {
+                    let c = self.bytes[n].cmp(&other.bytes[n]);
+                    if c != core::cmp::Ordering::Equal {
+                        return c;
+                    }
+                }
+                core::cmp::Ordering::Equal
+            }
+        }
+
+        impl PartialEq for $name {
+            fn eq(&self, other: &Self) -> bool {
+                self.bytes == other.bytes
+            }
+        }
+
+        impl Eq for $name {}
+
        impl $name {
            pub fn new(bytes: [u8; $size]) -> Self {
                Self { bytes }
--- a/veilid-core/src/crypto/envelope.rs
+++ b/veilid-core/src/crypto/envelope.rs
@ -66,7 +66,11 @@ impl Envelope {
        }
    }

-    pub fn from_signed_data(crypto: Crypto, data: &[u8]) -> VeilidAPIResult<Envelope> {
+    pub fn from_signed_data(
+        crypto: Crypto,
+        data: &[u8],
+        network_key: &Option<SharedSecret>,
+    ) -> VeilidAPIResult<Envelope> {
        // Ensure we are at least the length of the envelope
        // Silent drop here, as we use zero length packets as part of the protocol for hole punching
        if data.len() < MIN_ENVELOPE_SIZE {
@ -135,9 +139,22 @@ impl Envelope {
        let recipient_id_slice: [u8; PUBLIC_KEY_LENGTH] = data[0x4A..0x6A]
            .try_into()
            .map_err(VeilidAPIError::internal)?;
-        let nonce: Nonce = Nonce::new(nonce_slice);
-        let sender_id = PublicKey::new(sender_id_slice);
-        let recipient_id = PublicKey::new(recipient_id_slice);
+        let mut nonce: Nonce = Nonce::new(nonce_slice);
+        let mut sender_id = PublicKey::new(sender_id_slice);
+        let mut recipient_id = PublicKey::new(recipient_id_slice);
+
+        // Apply network key (not the best, but it will keep networks from colliding without much overhead)
+        if let Some(nk) = network_key.as_ref() {
+            for n in 0..NONCE_LENGTH {
+                nonce.bytes[n] ^= nk.bytes[n];
+            }
+            for n in 0..CRYPTO_KEY_LENGTH {
+                sender_id.bytes[n] ^= nk.bytes[n];
+            }
+            for n in 0..CRYPTO_KEY_LENGTH {
+                recipient_id.bytes[n] ^= nk.bytes[n];
+            }
+        }

        // Ensure sender_id and recipient_id are not the same
        if sender_id == recipient_id {
@ -175,13 +192,20 @@ impl Envelope {
        crypto: Crypto,
        data: &[u8],
        node_id_secret: &SecretKey,
+        network_key: &Option<SharedSecret>,
    ) -> VeilidAPIResult<Vec<u8>> {
        // Get DH secret
        let vcrypto = crypto
            .get(self.crypto_kind)
            .expect("need to ensure only valid crypto kinds here");
-        let dh_secret = vcrypto.cached_dh(&self.sender_id, node_id_secret)?;
+        let mut dh_secret = vcrypto.cached_dh(&self.sender_id, node_id_secret)?;

+        // Apply network key
+        if let Some(nk) = network_key.as_ref() {
+            for n in 0..CRYPTO_KEY_LENGTH {
+                dh_secret.bytes[n] ^= nk.bytes[n];
+            }
+        }
        // Decrypt message without authentication
        let body = vcrypto.crypt_no_auth_aligned_8(
            &data[0x6A..data.len() - 64],
@ -197,6 +221,7 @@ impl Envelope {
        crypto: Crypto,
        body: &[u8],
        node_id_secret: &SecretKey,
+        network_key: &Option<SharedSecret>,
    ) -> VeilidAPIResult<Vec<u8>> {
        // Ensure body isn't too long
        let envelope_size: usize = body.len() + MIN_ENVELOPE_SIZE;
@ -207,7 +232,7 @@ impl Envelope {
        let vcrypto = crypto
            .get(self.crypto_kind)
            .expect("need to ensure only valid crypto kinds here");
-        let dh_secret = vcrypto.cached_dh(&self.recipient_id, node_id_secret)?;
+        let mut dh_secret = vcrypto.cached_dh(&self.recipient_id, node_id_secret)?;

        // Write envelope body
        let mut data = vec![0u8; envelope_size];
@ -229,6 +254,22 @@ impl Envelope {
        // Write recipient node id
        data[0x4A..0x6A].copy_from_slice(&self.recipient_id.bytes);

+        // Apply network key (not the best, but it will keep networks from colliding without much overhead)
+        if let Some(nk) = network_key.as_ref() {
+            for n in 0..SECRET_KEY_LENGTH {
+                dh_secret.bytes[n] ^= nk.bytes[n];
+            }
+            for n in 0..NONCE_LENGTH {
+                data[0x12 + n] ^= nk.bytes[n];
+            }
+            for n in 0..CRYPTO_KEY_LENGTH {
+                data[0x2A + n] ^= nk.bytes[n];
+            }
+            for n in 0..CRYPTO_KEY_LENGTH {
+                data[0x4A + n] ^= nk.bytes[n];
+            }
+        }
+
        // Encrypt and authenticate message
        let encrypted_body = vcrypto.crypt_no_auth_unaligned(body, &self.nonce.bytes, &dh_secret);

--- a/veilid-core/src/crypto/tests/test_envelope_receipt.rs
+++ b/veilid-core/src/crypto/tests/test_envelope_receipt.rs
@ -3,8 +3,16 @@ use super::*;
 pub async fn test_envelope_round_trip(
    envelope_version: EnvelopeVersion,
    vcrypto: CryptoSystemVersion,
+    network_key: Option<SharedSecret>,
 ) {
-    info!("--- test envelope round trip ---");
+    if network_key.is_some() {
+        info!(
+            "--- test envelope round trip {} w/network key ---",
+            vcrypto.kind()
+        );
+    } else {
+        info!("--- test envelope round trip {} ---", vcrypto.kind());
+    }

    // Create envelope
    let ts = Timestamp::from(0x12345678ABCDEF69u64);
@ -25,15 +33,15 @@ pub async fn test_envelope_round_trip(

    // Serialize to bytes
    let enc_data = envelope
-        .to_encrypted_data(vcrypto.crypto(), body, &sender_secret)
+        .to_encrypted_data(vcrypto.crypto(), body, &sender_secret, &network_key)
        .expect("failed to encrypt data");

    // Deserialize from bytes
-    let envelope2 = Envelope::from_signed_data(vcrypto.crypto(), &enc_data)
+    let envelope2 = Envelope::from_signed_data(vcrypto.crypto(), &enc_data, &network_key)
        .expect("failed to deserialize envelope from data");

    let body2 = envelope2
-        .decrypt_body(vcrypto.crypto(), &enc_data, &recipient_secret)
+        .decrypt_body(vcrypto.crypto(), &enc_data, &recipient_secret, &network_key)
        .expect("failed to decrypt envelope body");

    // Compare envelope and body
@ -45,13 +53,13 @@ pub async fn test_envelope_round_trip(
    let mut mod_enc_data = enc_data.clone();
    mod_enc_data[enc_data_len - 1] ^= 0x80u8;
    assert!(
-        Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data).is_err(),
+        Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data, &network_key).is_err(),
        "should have failed to decode envelope with modified signature"
    );
    let mut mod_enc_data2 = enc_data.clone();
    mod_enc_data2[enc_data_len - 65] ^= 0x80u8;
    assert!(
-        Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data2).is_err(),
+        Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data2, &network_key).is_err(),
        "should have failed to decode envelope with modified data"
    );
 }
@ -97,7 +105,9 @@ pub async fn test_all() {
        for v in VALID_CRYPTO_KINDS {
            let vcrypto = crypto.get(v).unwrap();

-            test_envelope_round_trip(ev, vcrypto.clone()).await;
+            test_envelope_round_trip(ev, vcrypto.clone(), None).await;
+            test_envelope_round_trip(ev, vcrypto.clone(), Some(vcrypto.random_shared_secret()))
+                .await;
            test_receipt_round_trip(ev, vcrypto).await;
        }
    }
--- a/veilid-core/src/crypto/vld0/mod.rs
+++ b/veilid-core/src/crypto/vld0/mod.rs
@ -176,10 +176,10 @@ impl CryptoSystem for CryptoSystemVLD0 {
    }
    // Distance Metric
    fn distance(&self, key1: &PublicKey, key2: &PublicKey) -> CryptoKeyDistance {
-        let mut bytes = [0u8; PUBLIC_KEY_LENGTH];
+        let mut bytes = [0u8; CRYPTO_KEY_LENGTH];

-        for (n, byte) in bytes.iter_mut().enumerate() {
-            *byte = key1.bytes[n] ^ key2.bytes[n];
+        for n in 0..CRYPTO_KEY_LENGTH {
+            bytes[n] = key1.bytes[n] ^ key2.bytes[n];
        }

        CryptoKeyDistance::new(bytes)
--- a/veilid-core/src/lib.rs
+++ b/veilid-core/src/lib.rs
@ -97,7 +97,6 @@ use tracing::*;
 use veilid_tools::*;
 type RkyvDefaultValidator<'t> = rkyv::validation::validators::DefaultValidator<'t>;
 use futures_util::stream::FuturesUnordered;
-use owo_colors::OwoColorize;
 use schemars::{schema_for, JsonSchema};
 use serde::*;
 use stop_token::*;
--- a/veilid-core/src/network_manager/connection_handle.rs
+++ b/veilid-core/src/network_manager/connection_handle.rs
@ -34,14 +34,14 @@ impl ConnectionHandle {
        self.descriptor.clone()
    }

-    #[instrument(level="trace", skip(self, message), fields(message.len = message.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(self, message), fields(message.len = message.len())))]
    pub fn send(&self, message: Vec<u8>) -> ConnectionHandleSendResult {
        match self.channel.send((Span::current().id(), message)) {
            Ok(()) => ConnectionHandleSendResult::Sent,
            Err(e) => ConnectionHandleSendResult::NotSent(e.0 .1),
        }
    }
-    #[instrument(level="trace", skip(self, message), fields(message.len = message.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(self, message), fields(message.len = message.len())))]
    pub async fn send_async(&self, message: Vec<u8>) -> ConnectionHandleSendResult {
        match self
            .channel
--- a/veilid-core/src/network_manager/connection_manager.rs
+++ b/veilid-core/src/network_manager/connection_manager.rs
@ -256,8 +256,8 @@ impl ConnectionManager {

        log_net!(
            "== get_or_create_connection local_addr={:?} dial_info={:?}",
-            local_addr.green(),
-            dial_info.green()
+            local_addr,
+            dial_info
        );

        // Kill off any possibly conflicting connections
@ -273,8 +273,8 @@ impl ConnectionManager {
        {
            log_net!(
                "== Returning existing connection local_addr={:?} peer_address={:?}",
-                local_addr.green(),
-                peer_address.green()
+                local_addr,
+                peer_address
            );

            return Ok(NetworkResult::Value(conn));
@ -300,8 +300,8 @@ impl ConnectionManager {
                        {
                            log_net!(
                                    "== Returning existing connection in race local_addr={:?} peer_address={:?}",
-                                    local_addr.green(),
-                                    peer_address.green()
+                                    local_addr,
+                                    peer_address
                                );

                            return Ok(NetworkResult::Value(conn));
--- a/veilid-core/src/network_manager/direct_boot.rs
+++ b/veilid-core/src/network_manager/direct_boot.rs
@ -0,0 +1,59 @@
+use super::*;
+
+impl NetworkManager {
+    // Direct bootstrap request handler (separate fallback mechanism from cheaper TXT bootstrap mechanism)
+    #[instrument(level = "trace", skip(self), ret, err)]
+    pub(crate) async fn handle_boot_request(
+        &self,
+        descriptor: ConnectionDescriptor,
+    ) -> EyreResult<NetworkResult<()>> {
+        let routing_table = self.routing_table();
+
+        // Get a bunch of nodes with the various
+        let bootstrap_nodes = routing_table.find_bootstrap_nodes_filtered(2);
+
+        // Serialize out peer info
+        let bootstrap_peerinfo: Vec<PeerInfo> = bootstrap_nodes
+            .iter()
+            .filter_map(|nr| nr.make_peer_info(RoutingDomain::PublicInternet))
+            .collect();
+        let json_bytes = serialize_json(bootstrap_peerinfo).as_bytes().to_vec();
+
+        // Reply with a chunk of signed routing table
+        match self
+            .net()
+            .send_data_to_existing_connection(descriptor, json_bytes)
+            .await?
+        {
+            None => {
+                // Bootstrap reply was sent
+                Ok(NetworkResult::value(()))
+            }
+            Some(_) => Ok(NetworkResult::no_connection_other(
+                "bootstrap reply could not be sent",
+            )),
+        }
+    }
+
+    // Direct bootstrap request
+    #[instrument(level = "trace", err, skip(self))]
+    pub async fn boot_request(&self, dial_info: DialInfo) -> EyreResult<Vec<PeerInfo>> {
+        let timeout_ms = self.with_config(|c| c.network.rpc.timeout_ms);
+        // Send boot magic to requested peer address
+        let data = BOOT_MAGIC.to_vec();
+
+        let out_data: Vec<u8> = network_result_value_or_log!(self
+            .net()
+            .send_recv_data_unbound_to_dial_info(dial_info, data, timeout_ms)
+            .await? => [ format!(": dial_info={}, data.len={}", dial_info, data.len()) ]
+        {
+            return Ok(Vec::new());
+        });
+
+        let bootstrap_peerinfo: Vec<PeerInfo> =
+            deserialize_json(std::str::from_utf8(&out_data).wrap_err("bad utf8 in boot peerinfo")?)
+                .wrap_err("failed to deserialize boot peerinfo")?;
+
+        Ok(bootstrap_peerinfo)
+    }
+}
--- a/veilid-core/src/network_manager/mod.rs
+++ b/veilid-core/src/network_manager/mod.rs
@ -5,6 +5,8 @@ mod native;
 #[cfg(target_arch = "wasm32")]
 mod wasm;

+mod direct_boot;
+mod send_data;
 mod connection_handle;
 mod connection_limits;
 mod connection_manager;
@ -12,6 +14,7 @@ mod connection_table;
 mod network_connection;
 mod tasks;
 mod types;
+mod stats;

 pub mod tests;

@ -20,6 +23,9 @@ pub mod tests;
 pub use connection_manager::*;
 pub use network_connection::*;
 pub use types::*;
+pub use send_data::*;
+pub use direct_boot::*;
+pub use stats::*;

 ////////////////////////////////////////////////////////////////////////////////////////
 use connection_handle::*;
@ -42,6 +48,7 @@ use wasm::*;
 pub const MAX_MESSAGE_SIZE: usize = MAX_ENVELOPE_SIZE;
 pub const IPADDR_TABLE_SIZE: usize = 1024;
 pub const IPADDR_MAX_INACTIVE_DURATION_US: TimestampDuration = TimestampDuration::new(300_000_000u64); // 5 minutes
+pub const NODE_CONTACT_METHOD_CACHE_SIZE: usize = 1024;
 pub const PUBLIC_ADDRESS_CHANGE_DETECTION_COUNT: usize = 3;
 pub const PUBLIC_ADDRESS_CHECK_CACHE_SIZE: usize = 8;
 pub const PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS: u32 = 60;
@ -67,38 +74,6 @@ struct NetworkComponents {
    receipt_manager: ReceiptManager,
 }

-// Statistics per address
-#[derive(Clone, Default)]
-pub struct PerAddressStats {
-    last_seen_ts: Timestamp,
-    transfer_stats_accounting: TransferStatsAccounting,
-    transfer_stats: TransferStatsDownUp,
-}
-
-#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
-pub struct PerAddressStatsKey(IpAddr);
-
-impl Default for PerAddressStatsKey {
-    fn default() -> Self {
-        Self(IpAddr::V4(Ipv4Addr::UNSPECIFIED))
-    }
-}
-
-// Statistics about the low-level network
-#[derive(Clone)]
-pub struct NetworkManagerStats {
-    self_stats: PerAddressStats,
-    per_address_stats: LruCache<PerAddressStatsKey, PerAddressStats>,
-}
-
-impl Default for NetworkManagerStats {
-    fn default() -> Self {
-        Self {
-            self_stats: PerAddressStats::default(),
-            per_address_stats: LruCache::new(IPADDR_TABLE_SIZE),
-        }
-    }
-}

 #[derive(Debug)]
 struct ClientWhitelistEntry {
@ -130,6 +105,13 @@ pub(crate) enum NodeContactMethod {
    /// Must use outbound relay to reach the node
    OutboundRelay(NodeRef),
 }
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
+struct NodeContactMethodCacheKey {
+    own_node_info_ts: Option<Timestamp>,
+    target_node_info_ts: Timestamp,
+    target_node_ref_filter: Option<NodeRefFilter>,
+    target_node_ref_sequencing: Sequencing,
+}

 #[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
 struct PublicAddressCheckCacheKey(ProtocolType, AddressType);
@ -138,6 +120,7 @@ struct PublicAddressCheckCacheKey(ProtocolType, AddressType);
 struct NetworkManagerInner {
    stats: NetworkManagerStats,
    client_whitelist: LruCache<TypedKey, ClientWhitelistEntry>,
+    node_contact_method_cache: LruCache<NodeContactMethodCacheKey, NodeContactMethod>,
    public_address_check_cache:
        BTreeMap<PublicAddressCheckCacheKey, LruCache<IpAddr, SocketAddress>>,
    public_address_inconsistencies_table:
@ -160,6 +143,8 @@ struct NetworkManagerUnlockedInner {
    // Background processes
    rolling_transfers_task: TickTask<EyreReport>,
    public_address_check_task: TickTask<EyreReport>,
+    // Network Key
+    network_key: Option<SharedSecret>,
 }

 #[derive(Clone)]
@ -173,6 +158,7 @@ impl NetworkManager {
        NetworkManagerInner {
            stats: NetworkManagerStats::default(),
            client_whitelist: LruCache::new_unbounded(),
+            node_contact_method_cache: LruCache::new(NODE_CONTACT_METHOD_CACHE_SIZE),
            public_address_check_cache: BTreeMap::new(),
            public_address_inconsistencies_table: BTreeMap::new(),
        }
@ -185,6 +171,7 @@ impl NetworkManager {
        #[cfg(feature="unstable-blockstore")]
        block_store: BlockStore,
        crypto: Crypto,
+        network_key: Option<SharedSecret>,
    ) -> NetworkManagerUnlockedInner {
        NetworkManagerUnlockedInner {
            config,
@ -199,6 +186,7 @@ impl NetworkManager {
            update_callback: RwLock::new(None),
            rolling_transfers_task: TickTask::new(ROLLING_TRANSFERS_INTERVAL_SECS),
            public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS),
+            network_key,
        }
    }

@ -211,6 +199,37 @@ impl NetworkManager {
        block_store: BlockStore,
        crypto: Crypto,
    ) -> Self {
+
+        // Make the network key
+        let network_key = {
+            let c = config.get();
+            let network_key_password = if let Some(nkp) = c.network.network_key_password.clone() {
+                Some(nkp)
+            } else {
+                if c.network.routing_table.bootstrap.contains(&"bootstrap.veilid.net".to_owned()) {
+                    None
+                } else {
+                    Some(c.network.routing_table.bootstrap.join(","))
+                }
+            };
+
+            let network_key = if let Some(network_key_password) = network_key_password {
+                if !network_key_password.is_empty() {
+                    info!("Using network key");
+
+                    let bcs = crypto.best();
+                    // Yes the use of the salt this way is generally bad, but this just needs to be hashed
+                    Some(bcs.derive_shared_secret(network_key_password.as_bytes(), network_key_password.as_bytes()).expect("failed to derive network key"))
+                } else {
+                    None
+                }
+            } else {
+                None
+            };
+
+            network_key
+        };
+
        let this = Self {
            inner: Arc::new(Mutex::new(Self::new_inner())),
            unlocked_inner: Arc::new(Self::new_unlocked_inner(
@ -221,6 +240,7 @@ impl NetworkManager {
                #[cfg(feature="unstable-blockstore")]
                block_store,
                crypto,
+                network_key,
            )),
        };

@ -756,7 +776,7 @@ impl NetworkManager {
    }

    /// Builds an envelope for sending over the network
-    #[instrument(level = "trace", skip(self, body), err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, body), err))]
    fn build_envelope<B: AsRef<[u8]>>(
        &self,
        dest_node_id: TypedKey,
@ -779,7 +799,7 @@ impl NetworkManager {
        // Encode envelope
        let envelope = Envelope::new(version, node_id.kind, ts, nonce, node_id.value, dest_node_id.value);
        envelope
-            .to_encrypted_data(self.crypto(), body.as_ref(), &node_id_secret)
+            .to_encrypted_data(self.crypto(), body.as_ref(), &node_id_secret, &self.unlocked_inner.network_key)
            .wrap_err("envelope failed to encode")
    }

@ -787,7 +807,7 @@ impl NetworkManager {
    /// node_ref is the direct destination to which the envelope will be sent
    /// If 'destination_node_ref' is specified, it can be different than the node_ref being sent to
    /// which will cause the envelope to be relayed
-    #[instrument(level = "trace", skip(self, body), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, body), ret, err))]
    pub async fn send_envelope<B: AsRef<[u8]>>(
        &self,
        node_ref: NodeRef,
@ -826,7 +846,7 @@ impl NetworkManager {
    }

    /// Called by the RPC handler when we want to issue an direct receipt
-    #[instrument(level = "trace", skip(self, rcpt_data), err)]
+    #[instrument(level = "debug", skip(self, rcpt_data), err)]
    pub async fn send_out_of_band_receipt(
        &self,
        dial_info: DialInfo,
@ -839,473 +859,26 @@ impl NetworkManager {
        // should not be subject to our ability to decode it

        // Send receipt directly
-        log_net!(debug "send_out_of_band_receipt: dial_info={}", dial_info);
        network_result_value_or_log!(self
            .net()
            .send_data_unbound_to_dial_info(dial_info, rcpt_data)
-            .await? => {
+            .await? => [ format!(": dial_info={}, rcpt_data.len={}", dial_info, rcpt_data.len()) ] {
                return Ok(());
            }
        );
        Ok(())
    }

-    /// Send a reverse connection signal and wait for the return receipt over it
-    /// Then send the data across the new connection
-    /// Only usable for PublicInternet routing domain
-    #[instrument(level = "trace", skip(self, data), err)]
-    pub async fn do_reverse_connect(
-        &self,
-        relay_nr: NodeRef,
-        target_nr: NodeRef,
-        data: Vec<u8>,
-    ) -> EyreResult<NetworkResult<ConnectionDescriptor>> {
-        // Build a return receipt for the signal
-        let receipt_timeout = ms_to_us(
-            self.unlocked_inner
-                .config
-                .get()
-                .network
-                .reverse_connection_receipt_time_ms,
-        );
-        let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?;
-
-        // Get target routing domain
-        let Some(routing_domain) = target_nr.best_routing_domain() else {
-            return Ok(NetworkResult::no_connection_other("No routing domain for target"));
-        };
-
-        // Get our peer info
-        let Some(peer_info) = self
-            .routing_table()
-            .get_own_peer_info(routing_domain) else {
-            return Ok(NetworkResult::no_connection_other("Own peer info not available"));
-        };    
-
-        // Issue the signal
-        let rpc = self.rpc_processor();
-        network_result_try!(rpc
-            .rpc_call_signal(
-                Destination::relay(relay_nr, target_nr.clone()),
-                SignalInfo::ReverseConnect { receipt, peer_info },
-            )
-            .await
-            .wrap_err("failed to send signal")?);
-
-        // Wait for the return receipt
-        let inbound_nr = match eventual_value.await.take_value().unwrap() {
-            ReceiptEvent::ReturnedPrivate { private_route: _ }
-            | ReceiptEvent::ReturnedOutOfBand
-            | ReceiptEvent::ReturnedSafety => {
-                return Ok(NetworkResult::invalid_message(
-                    "reverse connect receipt should be returned in-band",
-                ));
-            }
-            ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef,
-            ReceiptEvent::Expired => {
-                return Ok(NetworkResult::timeout());
-            }
-            ReceiptEvent::Cancelled => {
-                return Ok(NetworkResult::no_connection_other(format!("reverse connect receipt cancelled from {}", target_nr)))
-            }
-        };
-
-        // We expect the inbound noderef to be the same as the target noderef
-        // if they aren't the same, we should error on this and figure out what then hell is up
-        if !target_nr.same_entry(&inbound_nr) {
-            bail!("unexpected noderef mismatch on reverse connect");
-        }
-
-        // And now use the existing connection to send over
-        if let Some(descriptor) = inbound_nr.last_connection() {
-            match self
-                .net()
-                .send_data_to_existing_connection(descriptor, data)
-                .await?
-            {
-                None => Ok(NetworkResult::value(descriptor)),
-                Some(_) => Ok(NetworkResult::no_connection_other(
-                    "unable to send over reverse connection",
-                )),
-            }
-        } else {
-            bail!("no reverse connection available")
-        }
-    }
-
-    /// Send a hole punch signal and do a negotiating ping and wait for the return receipt
-    /// Then send the data across the new connection
-    /// Only usable for PublicInternet routing domain
-    #[instrument(level = "trace", skip(self, data), err)]
-    pub async fn do_hole_punch(
-        &self,
-        relay_nr: NodeRef,
-        target_nr: NodeRef,
-        data: Vec<u8>,
-    ) -> EyreResult<NetworkResult<ConnectionDescriptor>> {
-        // Ensure we are filtered down to UDP (the only hole punch protocol supported today)
-        assert!(target_nr
-            .filter_ref()
-            .map(|nrf| nrf.dial_info_filter.protocol_type_set
-                == ProtocolTypeSet::only(ProtocolType::UDP))
-            .unwrap_or_default());
-
-        // Build a return receipt for the signal
-        let receipt_timeout = ms_to_us(
-            self.unlocked_inner
-                .config
-                .get()
-                .network
-                .hole_punch_receipt_time_ms,
-        );
-        let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?;
-
-        // Get target routing domain
-        let Some(routing_domain) = target_nr.best_routing_domain() else {
-            return Ok(NetworkResult::no_connection_other("No routing domain for target"));
-        };
-
-        // Get our peer info
-        let Some(peer_info) = self
-            .routing_table()
-            .get_own_peer_info(routing_domain) else {
-                return Ok(NetworkResult::no_connection_other("Own peer info not available"));
-            };
-
-        // Get the udp direct dialinfo for the hole punch
-        let hole_punch_did = target_nr
-            .first_filtered_dial_info_detail()
-            .ok_or_else(|| eyre!("No hole punch capable dialinfo found for node"))?;
-
-        // Do our half of the hole punch by sending an empty packet
-        // Both sides will do this and then the receipt will get sent over the punched hole
-        // Don't bother storing the returned connection descriptor as the 'last connection' because the other side of the hole
-        // punch should come through and create a real 'last connection' for us if this succeeds
-        network_result_try!(
-            self.net()
-                .send_data_to_dial_info(hole_punch_did.dial_info, Vec::new())
-                .await?
-        );
-
-        // Issue the signal
-        let rpc = self.rpc_processor();
-        network_result_try!(rpc
-            .rpc_call_signal(
-                Destination::relay(relay_nr, target_nr.clone()),
-                SignalInfo::HolePunch { receipt, peer_info },
-            )
-            .await
-            .wrap_err("failed to send signal")?);
-
-        // Wait for the return receipt
-        let inbound_nr = match eventual_value.await.take_value().unwrap() {
-            ReceiptEvent::ReturnedPrivate { private_route: _ }
-            | ReceiptEvent::ReturnedOutOfBand
-            | ReceiptEvent::ReturnedSafety => {
-                return Ok(NetworkResult::invalid_message(
-                    "hole punch receipt should be returned in-band",
-                ));
-            }
-            ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef,
-            ReceiptEvent::Expired => {
-                return Ok(NetworkResult::timeout());
-            }
-            ReceiptEvent::Cancelled => {
-                return Ok(NetworkResult::no_connection_other(format!("hole punch receipt cancelled from {}", target_nr)))
-            }
-        };
-
-        // We expect the inbound noderef to be the same as the target noderef
-        // if they aren't the same, we should error on this and figure out what then hell is up
-        if !target_nr.same_entry(&inbound_nr) {
-            bail!(
-                "unexpected noderef mismatch on hole punch {}, expected {}",
-                inbound_nr,
-                target_nr
-            );
-        }
-
-        // And now use the existing connection to send over
-        if let Some(descriptor) = inbound_nr.last_connection() {
-            match self
-                .net()
-                .send_data_to_existing_connection(descriptor, data)
-                .await?
-            {
-                None => Ok(NetworkResult::value(descriptor)),
-                Some(_) => Ok(NetworkResult::no_connection_other(
-                    "unable to send over hole punch",
-                )),
-            }
-        } else {
-            bail!("no hole punch available")
-        }
-    }
-
-    /// Figure out how to reach a node from our own node over the best routing domain and reference the nodes we want to access
-    /// Uses NodeRefs to ensure nodes are referenced, this is not a part of 'RoutingTable' because RoutingTable is not
-    /// allowed to use NodeRefs due to recursive locking
-    #[instrument(level = "trace", skip(self), ret)]
-    pub(crate) fn get_node_contact_method(
-        &self,
-        target_node_ref: NodeRef,
-    ) -> EyreResult<NodeContactMethod> {
-        let routing_table = self.routing_table();
-
-        // Figure out the best routing domain to get the contact method over
-        let routing_domain = match target_node_ref.best_routing_domain() {
-            Some(rd) => rd,
-            None => {
-                log_net!("no routing domain for node {:?}", target_node_ref);
-                return Ok(NodeContactMethod::Unreachable);
-            }
-        };
-
-        // Node A is our own node
-        // Use whatever node info we've calculated so far
-        let peer_a = routing_table.get_best_effort_own_peer_info(routing_domain);
-
-        // Node B is the target node
-        let peer_b = match target_node_ref.make_peer_info(routing_domain) {
-            Some(ni) => ni,
-            None => {
-                log_net!("no node info for node {:?}", target_node_ref);
-                return Ok(NodeContactMethod::Unreachable);
-            }
-        };
-
-        // Dial info filter comes from the target node ref
-        let dial_info_filter = target_node_ref.dial_info_filter();
-        let mut sequencing = target_node_ref.sequencing();
-        
-        // If the node has had lost questions or failures to send, prefer sequencing
-        // to improve reliability. The node may be experiencing UDP fragmentation drops
-        // or other firewalling issues and may perform better with TCP.
-        let unreliable = target_node_ref.peer_stats().rpc_stats.failed_to_send > 0 || target_node_ref.peer_stats().rpc_stats.recent_lost_answers > 0;
-        if unreliable && sequencing < Sequencing::PreferOrdered {
-            sequencing = Sequencing::PreferOrdered;
-        }
-
-        // Get the best contact method with these parameters from the routing domain
-        let cm = routing_table.get_contact_method(
-            routing_domain,
-            &peer_a,
-            &peer_b,
-            dial_info_filter,
-            sequencing,
-        );
-
-        // Translate the raw contact method to a referenced contact method
-        Ok(match cm {
-            ContactMethod::Unreachable => NodeContactMethod::Unreachable,
-            ContactMethod::Existing => NodeContactMethod::Existing,
-            ContactMethod::Direct(di) => NodeContactMethod::Direct(di),
-            ContactMethod::SignalReverse(relay_key, target_key) => {
-                let relay_nr = routing_table
-                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
-                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
-                if !target_node_ref.node_ids().contains(&target_key) {
-                    bail!("signalreverse target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key );
-                }
-                NodeContactMethod::SignalReverse(relay_nr, target_node_ref)
-            }
-            ContactMethod::SignalHolePunch(relay_key, target_key) => {
-                let relay_nr = routing_table
-                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
-                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
-                if !target_node_ref.node_ids().contains(&target_key) {
-                    bail!("signalholepunch target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key );
-                }
-                // if any other protocol were possible here we could update this and do_hole_punch
-                // but tcp hole punch is very very unreliable it seems
-                let udp_target_node_ref = target_node_ref.filtered_clone(NodeRefFilter::new().with_protocol_type(ProtocolType::UDP));
-
-                NodeContactMethod::SignalHolePunch(relay_nr, udp_target_node_ref)
-            }
-            ContactMethod::InboundRelay(relay_key) => {
-                let relay_nr = routing_table
-                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
-                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
-                NodeContactMethod::InboundRelay(relay_nr)
-            }
-            ContactMethod::OutboundRelay(relay_key) => {
-                let relay_nr = routing_table
-                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
-                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
-                NodeContactMethod::OutboundRelay(relay_nr)
-            }
-        })
-    }
-
-    /// Send raw data to a node
-    ///
-    /// We may not have dial info for a node, but have an existing connection for it
-    /// because an inbound connection happened first, and no FindNodeQ has happened to that
-    /// node yet to discover its dial info. The existing connection should be tried first
-    /// in this case.
-    ///
-    /// Sending to a node requires determining a NetworkClass compatible mechanism
-    pub fn send_data(
-        &self,
-        node_ref: NodeRef,
-        data: Vec<u8>,
-    ) -> SendPinBoxFuture<EyreResult<NetworkResult<SendDataKind>>> {
-        let this = self.clone();
-        Box::pin(
-            async move {
-                // info!("{}", format!("send_data to: {:?}", node_ref).red());
-
-                // First try to send data to the last socket we've seen this peer on
-                let data = if let Some(connection_descriptor) = node_ref.last_connection() {
-                    // info!(
-                    //     "{}",
-                    //     format!("last_connection to: {:?}", connection_descriptor).red()
-                    // );
-
-                    match this
-                        .net()
-                        .send_data_to_existing_connection(connection_descriptor, data)
-                        .await?
-                    {
-                        None => {
-                            // info!(
-                            //     "{}",
-                            //     format!("sent to existing connection: {:?}", connection_descriptor)
-                            //         .red()
-                            // );
-
-                            // Update timestamp for this last connection since we just sent to it
-                            node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
-
-                            return Ok(NetworkResult::value(SendDataKind::Existing(
-                                connection_descriptor,
-                            )));
-                        }
-                        Some(d) => d,
-                    }
-                } else {
-                    data
-                };
-
-                // info!("{}", "no existing connection".red());
-
-                // If we don't have last_connection, try to reach out to the peer via its dial info
-                let contact_method = this.get_node_contact_method(node_ref.clone())?;
-                log_net!(
-                    "send_data via {:?} to dialinfo {:?}",
-                    contact_method,
-                    node_ref
-                );
-                match contact_method {
-                    NodeContactMethod::OutboundRelay(relay_nr)
-                    | NodeContactMethod::InboundRelay(relay_nr) => {
-                        network_result_try!(this.send_data(relay_nr, data).await?);
-                        Ok(NetworkResult::value(SendDataKind::Indirect))
-                    }
-                    NodeContactMethod::Direct(dial_info) => {
-                        let connection_descriptor = network_result_try!(
-                            this.net().send_data_to_dial_info(dial_info, data).await?
-                        );
-                        // If we connected to this node directly, save off the last connection so we can use it again
-                        node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
-
-                        Ok(NetworkResult::value(SendDataKind::Direct(
-                            connection_descriptor,
-                        )))
-                    }
-                    NodeContactMethod::SignalReverse(relay_nr, target_node_ref) => {
-                        let connection_descriptor = network_result_try!(
-                            this.do_reverse_connect(relay_nr, target_node_ref, data)
-                                .await?
-                        );
-                        Ok(NetworkResult::value(SendDataKind::Direct(
-                            connection_descriptor,
-                        )))
-                    }
-                    NodeContactMethod::SignalHolePunch(relay_nr, target_node_ref) => {
-                        let connection_descriptor = network_result_try!(
-                            this.do_hole_punch(relay_nr, target_node_ref, data).await?
-                        );
-                        Ok(NetworkResult::value(SendDataKind::Direct(
-                            connection_descriptor,
-                        )))
-                    }
-                    NodeContactMethod::Existing => Ok(NetworkResult::no_connection_other(
-                        "should have found an existing connection",
-                    )),
-                    NodeContactMethod::Unreachable => Ok(NetworkResult::no_connection_other(
-                        "Can't send to this node",
-                    )),
-                }
-            }
-            .instrument(trace_span!("send_data")),
-        )
-    }
-
-    // Direct bootstrap request handler (separate fallback mechanism from cheaper TXT bootstrap mechanism)
-    #[instrument(level = "trace", skip(self), ret, err)]
-    async fn handle_boot_request(
-        &self,
-        descriptor: ConnectionDescriptor,
-    ) -> EyreResult<NetworkResult<()>> {
-        let routing_table = self.routing_table();
-
-        // Get a bunch of nodes with the various
-        let bootstrap_nodes = routing_table.find_bootstrap_nodes_filtered(2);
-
-        // Serialize out peer info
-        let bootstrap_peerinfo: Vec<PeerInfo> = bootstrap_nodes
-            .iter()
-            .filter_map(|nr| nr.make_peer_info(RoutingDomain::PublicInternet))
-            .collect();
-        let json_bytes = serialize_json(bootstrap_peerinfo).as_bytes().to_vec();
-
-        // Reply with a chunk of signed routing table
-        match self
-            .net()
-            .send_data_to_existing_connection(descriptor, json_bytes)
-            .await?
-        {
-            None => {
-                // Bootstrap reply was sent
-                Ok(NetworkResult::value(()))
-            }
-            Some(_) => Ok(NetworkResult::no_connection_other(
-                "bootstrap reply could not be sent",
-            )),
-        }
-    }
-
-    // Direct bootstrap request
-    #[instrument(level = "trace", err, skip(self))]
-    pub async fn boot_request(&self, dial_info: DialInfo) -> EyreResult<Vec<PeerInfo>> {
-        let timeout_ms = self.with_config(|c| c.network.rpc.timeout_ms);
-        // Send boot magic to requested peer address
-        let data = BOOT_MAGIC.to_vec();
-        let out_data: Vec<u8> = network_result_value_or_log!(self
-            .net()
-            .send_recv_data_unbound_to_dial_info(dial_info, data, timeout_ms)
-            .await? =>
-        {
-            return Ok(Vec::new());
-        });
-
-        let bootstrap_peerinfo: Vec<PeerInfo> =
-            deserialize_json(std::str::from_utf8(&out_data).wrap_err("bad utf8 in boot peerinfo")?)
-                .wrap_err("failed to deserialize boot peerinfo")?;
-
-        Ok(bootstrap_peerinfo)
-    }
-
    // Called when a packet potentially containing an RPC envelope is received by a low-level
    // network protocol handler. Processes the envelope, authenticates and decrypts the RPC message
    // and passes it to the RPC handler
-    #[instrument(level = "trace", ret, err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", ret, err, skip(self, data), fields(data.len = data.len())))]
    async fn on_recv_envelope(
        &self,
-        data: &[u8],
+        data: &mut [u8],
        connection_descriptor: ConnectionDescriptor,
    ) -> EyreResult<bool> {
+        #[cfg(feature="verbose-tracing")]
        let root = span!(
            parent: None,
            Level::TRACE,
@ -1313,6 +886,7 @@ impl NetworkManager {
            "data.len" = data.len(),
            "descriptor" = ?connection_descriptor
        );
+        #[cfg(feature="verbose-tracing")]
        let _root_enter = root.enter();

        log_net!(
@ -1354,18 +928,18 @@ impl NetworkManager {

        // Is this a direct bootstrap request instead of an envelope?
        if data[0..4] == *BOOT_MAGIC {
-            network_result_value_or_log!(self.handle_boot_request(connection_descriptor).await? => {});
+            network_result_value_or_log!(self.handle_boot_request(connection_descriptor).await? => [ format!(": connection_descriptor={:?}", connection_descriptor) ] {});
            return Ok(true);
        }

        // Is this an out-of-band receipt instead of an envelope?
        if data[0..3] == *RECEIPT_MAGIC {
-            network_result_value_or_log!(self.handle_out_of_band_receipt(data).await => {});
+            network_result_value_or_log!(self.handle_out_of_band_receipt(data).await => [ format!(": data.len={}", data.len()) ] {});
            return Ok(true);
        }

        // Decode envelope header (may fail signature validation)
-        let envelope = match Envelope::from_signed_data(self.crypto(), data) {
+        let envelope = match Envelope::from_signed_data(self.crypto(), data, &self.unlocked_inner.network_key) {
            Ok(v) => v,
            Err(e) => {
                log_net!(debug "envelope failed to decode: {}", e);
@ -1387,8 +961,9 @@ impl NetworkManager {
        if let Some(tsbehind) = tsbehind {
            if tsbehind.as_u64() != 0 && (ts > ets && ts.saturating_sub(ets) > tsbehind) {
                log_net!(debug
-                    "envelope time was too far in the past: {}ms ",
-                    timestamp_to_secs(ts.saturating_sub(ets).as_u64()) * 1000f64
+                    "Timestamp behind: {}ms ({})",
+                    timestamp_to_secs(ts.saturating_sub(ets).as_u64()) * 1000f64,
+                    connection_descriptor.remote()
                );
                return Ok(false);
            }
@ -1396,8 +971,9 @@ impl NetworkManager {
        if let Some(tsahead) = tsahead {
            if tsahead.as_u64() != 0 && (ts < ets && ets.saturating_sub(ts) > tsahead) {
                log_net!(debug
-                    "envelope time was too far in the future: {}ms",
-                    timestamp_to_secs(ets.saturating_sub(ts).as_u64()) * 1000f64
+                    "Timestamp ahead: {}ms ({})",
+                    timestamp_to_secs(ets.saturating_sub(ts).as_u64()) * 1000f64,
+                    connection_descriptor.remote()
                );
                return Ok(false);
            }
@ -1443,8 +1019,20 @@ impl NetworkManager {
            };

            if let Some(relay_nr) = some_relay_nr {
+                // Force sequencing if this came in sequenced. 
+                // The sender did the prefer/ensure calculation when it did get_contact_method,
+                // so we don't need to do it here.
+                let relay_nr = if connection_descriptor.remote().protocol_type().is_ordered() {
+                    let mut relay_nr = relay_nr.clone();
+                    relay_nr.set_sequencing(Sequencing::EnsureOrdered);
+                    relay_nr
+                } else {
+                    relay_nr
+                };
+
                // Relay the packet to the desired destination
                log_net!("relaying {} bytes to {}", data.len(), relay_nr);
+
                network_result_value_or_log!(match self.send_data(relay_nr, data.to_vec())
                    .await {
                        Ok(v) => v,
@ -1452,7 +1040,7 @@ impl NetworkManager {
                            log_net!(debug "failed to forward envelope: {}" ,e);
                            return Ok(false);    
                        }
-                    } => {
+                    } => [ format!(": relay_nr={}, data.len={}", relay_nr, data.len()) ] {
                        return Ok(false);
                    }
                );
@ -1466,7 +1054,7 @@ impl NetworkManager {

        // Decrypt the envelope body
        let body = match envelope
-            .decrypt_body(self.crypto(), data, &node_id_secret) {
+            .decrypt_body(self.crypto(), data, &node_id_secret, &self.unlocked_inner.network_key) {
                Ok(v) => v,
                Err(e) => {
                    log_net!(debug "failed to decrypt envelope body: {}",e);
@ -1505,108 +1093,6 @@ impl NetworkManager {
        Ok(true)
    }

-    // Callbacks from low level network for statistics gathering
-    pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) {
-        let inner = &mut *self.inner.lock();
-        inner
-            .stats
-            .self_stats
-            .transfer_stats_accounting
-            .add_up(bytes);
-        inner
-            .stats
-            .per_address_stats
-            .entry(PerAddressStatsKey(addr), |_k,_v| {
-                // do nothing on LRU evict
-            })
-            .or_insert(PerAddressStats::default())
-            .transfer_stats_accounting
-            .add_up(bytes);
-    }
-
-    pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) {
-        let inner = &mut *self.inner.lock();
-        inner
-            .stats
-            .self_stats
-            .transfer_stats_accounting
-            .add_down(bytes);
-        inner
-            .stats
-            .per_address_stats
-            .entry(PerAddressStatsKey(addr), |_k,_v| {
-                // do nothing on LRU evict
-            })
-            .or_insert(PerAddressStats::default())
-            .transfer_stats_accounting
-            .add_down(bytes);
-    }
-
-    // Get stats
-    pub fn get_stats(&self) -> NetworkManagerStats {
-        let inner = self.inner.lock();
-        inner.stats.clone()
-    }
-
-    pub fn get_veilid_state(&self) -> VeilidStateNetwork {
-        let has_state = self
-            .unlocked_inner
-            .components
-            .read()
-            .as_ref()
-            .map(|c| c.net.is_started())
-            .unwrap_or(false);
-
-        if !has_state {
-            return VeilidStateNetwork {
-                started: false,
-                bps_down: 0.into(),
-                bps_up: 0.into(),
-                peers: Vec::new(),
-                
-            };
-        }
-        let routing_table = self.routing_table();
-
-        let (bps_down, bps_up) = {
-            let inner = self.inner.lock();
-            (
-                inner.stats.self_stats.transfer_stats.down.average,
-                inner.stats.self_stats.transfer_stats.up.average,
-            )
-        };
-
-        VeilidStateNetwork {
-            started: true,
-            bps_down,
-            bps_up,
-            peers: {
-                let mut out = Vec::new();
-                for (k, v) in routing_table.get_recent_peers() {
-                    if let Ok(Some(nr)) = routing_table.lookup_node_ref(k) {
-                        let peer_stats = nr.peer_stats();
-                        let peer = PeerTableData {
-                            node_ids: nr.node_ids().iter().copied().collect(),
-                            peer_address: v.last_connection.remote().to_string(),
-                            peer_stats,
-                        };
-                        out.push(peer);
-                    }
-                }
-                out
-            },
-        }
-    }
-
-    fn send_network_update(&self) {
-        let update_cb = self.unlocked_inner.update_callback.read().clone();
-        if update_cb.is_none() {
-            return;
-        }
-        let state = self.get_veilid_state();
-        (update_cb.unwrap())(VeilidUpdate::Network(state));
-    }
-
    // Determine if a local IP address has changed
    // this means we should restart the low level network and and recreate all of our dial info
    // Wait until we have received confirmation from N different peers
@ -1628,8 +1114,8 @@ impl NetworkManager {
        connection_descriptor: ConnectionDescriptor, // the connection descriptor used
        reporting_peer: NodeRef,       // the peer's noderef reporting the socket address
    ) {
-        // debug code
-        //info!("report_global_socket_address\nsocket_address: {:#?}\nconnection_descriptor: {:#?}\nreporting_peer: {:#?}", socket_address, connection_descriptor, reporting_peer);
+        #[cfg(feature = "verbose-tracing")]
+        debug!("report_global_socket_address\nsocket_address: {:#?}\nconnection_descriptor: {:#?}\nreporting_peer: {:#?}", socket_address, connection_descriptor, reporting_peer);

        // Ignore these reports if we are currently detecting public dial info
        let net = self.net();
--- a/veilid-core/src/network_manager/native/mod.rs
+++ b/veilid-core/src/network_manager/native/mod.rs
@ -358,7 +358,7 @@ impl Network {
    // This creates a short-lived connection in the case of connection-oriented protocols
    // for the purpose of sending this one message.
    // This bypasses the connection table as it is not a 'node to node' connection.
-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_unbound_to_dial_info(
        &self,
        dial_info: DialInfo,
@ -416,7 +416,7 @@ impl Network {
    // This creates a short-lived connection in the case of connection-oriented protocols
    // for the purpose of sending this one message.
    // This bypasses the connection table as it is not a 'node to node' connection.
-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_recv_data_unbound_to_dial_info(
        &self,
        dial_info: DialInfo,
@ -496,7 +496,7 @@ impl Network {
        }
    }

-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_to_existing_connection(
        &self,
        descriptor: ConnectionDescriptor,
@ -515,7 +515,8 @@ impl Network {
                network_result_value_or_log!(ph.clone()
                    .send_message(data.clone(), peer_socket_addr)
                    .await
-                    .wrap_err("sending data to existing conection")? => { return Ok(Some(data)); } );
+                    .wrap_err("sending data to existing conection")? => [ format!(": data.len={}, descriptor={:?}", data.len(), descriptor) ] 
+                    { return Ok(Some(data)); } );

                // Network accounting
                self.network_manager()
@ -556,7 +557,7 @@ impl Network {

    // Send data directly to a dial info, possibly without knowing which node it is going to
    // Returns a descriptor for the connection used to send the data
-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_to_dial_info(
        &self,
        dial_info: DialInfo,
--- a/veilid-core/src/network_manager/native/network_class_discovery.rs
+++ b/veilid-core/src/network_manager/native/network_class_discovery.rs
@ -92,7 +92,7 @@ impl DiscoveryContext {
                    );
                    return None;
                }
-            } =>  {
+            } => [ format!(": node_ref={}", node_ref) ] {
                return None;
            }
        );
--- a/veilid-core/src/network_manager/native/network_udp.rs
+++ b/veilid-core/src/network_manager/native/network_udp.rs
@ -67,7 +67,6 @@ impl Network {
                            {
                                Ok(Ok((size, descriptor))) => {
                                    // XXX: Limit the number of packets from the same IP address?
-                                    log_net!("UDP packet: {:?}", descriptor);

                                    // Network accounting
                                    network_manager.stats_packet_rcvd(
@ -77,7 +76,7 @@ impl Network {

                                    // Pass it up for processing
                                    if let Err(e) = network_manager
-                                        .on_recv_envelope(&data[..size], descriptor)
+                                        .on_recv_envelope(&mut data[..size], descriptor)
                                        .await
                                    {
                                        log_net!(debug "failed to process received udp envelope: {}", e);
--- a/veilid-core/src/network_manager/native/protocol/tcp.rs
+++ b/veilid-core/src/network_manager/native/protocol/tcp.rs
@ -56,10 +56,11 @@ impl RawTcpNetworkConnection {
        stream.flush().await.into_network_result()
    }

-    #[instrument(level="trace", err, skip(self, message), fields(network_result, message.len = message.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, message), fields(network_result, message.len = message.len())))]
    pub async fn send(&self, message: Vec<u8>) -> io::Result<NetworkResult<()>> {
        let mut stream = self.stream.clone();
        let out = Self::send_internal(&mut stream, message).await?;
+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }
@ -87,11 +88,15 @@ impl RawTcpNetworkConnection {
        Ok(NetworkResult::Value(out))
    }

-    // #[instrument(level = "trace", err, skip(self), fields(network_result))]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", err, skip(self), fields(network_result))
+    )]
    pub async fn recv(&self) -> io::Result<NetworkResult<Vec<u8>>> {
        let mut stream = self.stream.clone();
        let out = Self::recv_internal(&mut stream).await?;
-        //tracing::Span::current().record("network_result", &tracing::field::display(&out));
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }
 }
@ -143,7 +148,7 @@ impl RawTcpProtocolHandler {
            ps,
        ));

-        log_net!(debug "TCP: on_accept_async from: {}", socket_addr);
+        log_net!(debug "Connection accepted from: {} (TCP)", socket_addr);

        Ok(Some(conn))
    }
--- a/veilid-core/src/network_manager/native/protocol/udp.rs
+++ b/veilid-core/src/network_manager/native/protocol/udp.rs
@ -4,22 +4,38 @@ use sockets::*;
 #[derive(Clone)]
 pub struct RawUdpProtocolHandler {
    socket: Arc<UdpSocket>,
+    assembly_buffer: AssemblyBuffer,
 }

 impl RawUdpProtocolHandler {
    pub fn new(socket: Arc<UdpSocket>) -> Self {
-        Self { socket }
+        Self {
+            socket,
+            assembly_buffer: AssemblyBuffer::new(),
+        }
    }

-    // #[instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor)))]
    pub async fn recv_message(&self, data: &mut [u8]) -> io::Result<(usize, ConnectionDescriptor)> {
-        let (size, descriptor) = loop {
+        let (message_len, descriptor) = loop {
+            // Get a packet
            let (size, remote_addr) = network_result_value_or_log!(self.socket.recv_from(data).await.into_network_result()? => continue);
-            if size > MAX_MESSAGE_SIZE {
-                log_net!(debug "{}({}) at {}@{}:{}", "Invalid message".green(), "received too large UDP message", file!(), line!(), column!());
+
+            // Insert into assembly buffer
+            let Some(message) = self.assembly_buffer.insert_frame(&data[0..size], remote_addr) else {
+                continue;
+            };
+
+            // Check length of reassembled message (same for all protocols)
+            if message.len() > MAX_MESSAGE_SIZE {
+                log_net!(debug "{}({}) at {}@{}:{}", "Invalid message", "received too large UDP message", file!(), line!(), column!());
                continue;
            }

+            // Copy assemble message out if we got one
+            data[0..message.len()].copy_from_slice(&message);
+
+            // Return a connection descriptor and the amount of data in the message
            let peer_addr = PeerAddress::new(
                SocketAddress::from_socket_addr(remote_addr),
                ProtocolType::UDP,
@ -30,25 +46,48 @@ impl RawUdpProtocolHandler {
                SocketAddress::from_socket_addr(local_socket_addr),
            );

-            break (size, descriptor);
+            break (message.len(), descriptor);
        };

-        // tracing::Span::current().record("ret.len", &size);
-        // tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str());
-        Ok((size, descriptor))
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.len", &size);
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str());
+        Ok((message_len, descriptor))
    }

-    #[instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor)))]
    pub async fn send_message(
        &self,
        data: Vec<u8>,
-        socket_addr: SocketAddr,
+        remote_addr: SocketAddr,
    ) -> io::Result<NetworkResult<ConnectionDescriptor>> {
        if data.len() > MAX_MESSAGE_SIZE {
            bail_io_error_other!("sending too large UDP message");
        }
+
+        // Fragment and send
+        let sender = |framed_chunk: Vec<u8>, remote_addr: SocketAddr| async move {
+            let len = network_result_try!(self
+                .socket
+                .send_to(&framed_chunk, remote_addr)
+                .await
+                .into_network_result()?);
+            if len != framed_chunk.len() {
+                bail_io_error_other!("UDP partial send")
+            }
+            Ok(NetworkResult::value(()))
+        };
+
+        network_result_try!(
+            self.assembly_buffer
+                .split_message(data, remote_addr, sender)
+                .await?
+        );
+
+        // Return a connection descriptor for the sent message
        let peer_addr = PeerAddress::new(
-            SocketAddress::from_socket_addr(socket_addr),
+            SocketAddress::from_socket_addr(remote_addr),
            ProtocolType::UDP,
        );
        let local_socket_addr = self.socket.local_addr()?;
@ -58,16 +97,9 @@ impl RawUdpProtocolHandler {
            SocketAddress::from_socket_addr(local_socket_addr),
        );

-        let len = network_result_try!(self
-            .socket
-            .send_to(&data, socket_addr)
-            .await
-            .into_network_result()?);
-        if len != data.len() {
-            bail_io_error_other!("UDP partial send")
-        }
-
+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("ret.len", &len);
+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str());
        Ok(NetworkResult::value(descriptor))
    }
--- a/veilid-core/src/network_manager/native/protocol/ws.rs
+++ b/veilid-core/src/network_manager/native/protocol/ws.rs
@ -72,7 +72,7 @@ where
    //         .map_err(to_io_error_other)
    // }

-    #[instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len())))]
    pub async fn send(&self, message: Vec<u8>) -> io::Result<NetworkResult<()>> {
        if message.len() > MAX_MESSAGE_SIZE {
            bail_io_error_other!("received too large WS message");
@ -82,6 +82,7 @@ where
            Err(e) => err_to_network_result(e),
        };
        if !out.is_value() {
+            #[cfg(feature = "verbose-tracing")]
            tracing::Span::current().record("network_result", &tracing::field::display(&out));
            return Ok(out);
        }
@ -89,11 +90,13 @@ where
            Ok(v) => NetworkResult::value(v),
            Err(e) => err_to_network_result(e),
        };
+
+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }

-    // #[instrument(level = "trace", err, skip(self), fields(network_result, ret.len))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self), fields(network_result, ret.len)))]
    pub async fn recv(&self) -> io::Result<NetworkResult<Vec<u8>>> {
        let out = match self.stream.clone().next().await {
            Some(Ok(Message::Binary(v))) => {
@ -120,7 +123,8 @@ where
            )),
        };

-        // tracing::Span::current().record("network_result", &tracing::field::display(&out));
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }
 }
@ -212,7 +216,7 @@ impl WebsocketProtocolHandler {
            ws_stream,
        ));

-        log_net!(debug "{}: on_accept_async from: {}", if self.arc.tls { "WSS" } else { "WS" }, socket_addr);
+        log_net!(debug "Connection accepted from: {} ({})", socket_addr, if self.arc.tls { "WSS" } else { "WS" });

        Ok(Some(conn))
    }
--- a/veilid-core/src/network_manager/network_connection.rs
+++ b/veilid-core/src/network_manager/network_connection.rs
@ -179,7 +179,7 @@ impl NetworkConnection {
        }
    }

-    #[instrument(level="trace", skip(message, stats), fields(message.len = message.len()), ret)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(message, stats), fields(message.len = message.len()), ret))]
    async fn send_internal(
        protocol_connection: &ProtocolNetworkConnection,
        stats: Arc<Mutex<NetworkConnectionStats>>,
@ -194,7 +194,7 @@ impl NetworkConnection {
        Ok(NetworkResult::Value(out))
    }

-    #[instrument(level="trace", skip(stats), fields(ret.len))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(stats), fields(ret.len)))]
    async fn recv_internal(
        protocol_connection: &ProtocolNetworkConnection,
        stats: Arc<Mutex<NetworkConnectionStats>>,
@ -205,6 +205,7 @@ impl NetworkConnection {
        let mut stats = stats.lock();
        stats.last_message_recv_time.max_assign(Some(ts));

+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("ret.len", out.len());

        Ok(NetworkResult::Value(out))
@ -235,7 +236,7 @@ impl NetworkConnection {
        Box::pin(async move {
            log_net!(
                "== Starting process_connection loop for id={}, {:?}", connection_id,
-                descriptor.green()
+                descriptor
            );

            let network_manager = connection_manager.network_manager();
@ -248,7 +249,7 @@ impl NetworkConnection {
            let new_timer = || {
                sleep(connection_manager.connection_inactivity_timeout_ms()).then(|_| async {
                    // timeout
-                    log_net!("== Connection timeout on {:?}", descriptor.green());
+                    log_net!("== Connection timeout on {:?}", descriptor);
                    RecvLoopAction::Timeout
                })
            };
@ -300,14 +301,18 @@ impl NetworkConnection {
                        .then(|res| async {
                            match res {
                                Ok(v) => {
-                                    
-                                    let message = network_result_value_or_log!(v => {
+                                    if v.is_no_connection() {
+                                        let peer_address = protocol_connection.descriptor().remote();
+                                        log_net!(debug "Connection closed from: {} ({})", peer_address.socket_address().to_socket_addr(), peer_address.protocol_type());
+                                        return RecvLoopAction::Finish;
+                                    }
+                                    let mut message = network_result_value_or_log!(v => [ format!(": protocol_connection={:?}", protocol_connection) ] {
                                        return RecvLoopAction::Finish;
                                    });

                                    // Pass received messages up to the network manager for processing
                                    if let Err(e) = network_manager
-                                        .on_recv_envelope(message.as_slice(), descriptor)
+                                        .on_recv_envelope(message.as_mut_slice(), descriptor)
                                        .await
                                    {
                                        log_net!(debug "failed to process received envelope: {}", e);
@ -361,7 +366,7 @@ impl NetworkConnection {

            log_net!(
                "== Connection loop finished descriptor={:?}",
-                descriptor.green()
+                descriptor
            );

            // Let the connection manager know the receive loop exited
--- a/veilid-core/src/network_manager/send_data.rs
+++ b/veilid-core/src/network_manager/send_data.rs
@ -0,0 +1,610 @@
+use super::*;
+
+impl NetworkManager {
+    /// Send raw data to a node
+    ///
+    /// We may not have dial info for a node, but have an existing connection for it
+    /// because an inbound connection happened first, and no FindNodeQ has happened to that
+    /// node yet to discover its dial info. The existing connection should be tried first
+    /// in this case, if it matches the node ref's filters and no more permissive connection
+    /// could be established.
+    ///
+    /// Sending to a node requires determining a NetworkClass compatible mechanism
+    pub fn send_data(
+        &self,
+        target_node_ref: NodeRef,
+        data: Vec<u8>,
+    ) -> SendPinBoxFuture<EyreResult<NetworkResult<SendDataKind>>> {
+        let this = self.clone();
+        Box::pin(
+            async move {
+                // Get the best way to contact this node
+                let contact_method = this.get_node_contact_method(target_node_ref.clone())?;
+
+                // If we need to relay, do it
+                let (contact_method, node_ref, relayed) = match contact_method {
+                    NodeContactMethod::OutboundRelay(relay_nr)
+                    | NodeContactMethod::InboundRelay(relay_nr) => {
+                        let cm = this.get_node_contact_method(relay_nr.clone())?;
+                        (cm, relay_nr, true)
+                    }
+                    cm => (cm, target_node_ref.clone(), false),
+                };
+
+                #[cfg(feature = "verbose-tracing")]
+                debug!(
+                    "ContactMethod: {:?} for {:?}",
+                    contact_method, target_node_ref
+                );
+
+                // Try the contact method
+                let sdk = match contact_method {
+                    NodeContactMethod::OutboundRelay(relay_nr)
+                    | NodeContactMethod::InboundRelay(relay_nr) => {
+                        // Relay loop or multiple relays
+                        bail!(
+                            "Relay loop or multiple relays detected: {} -> {} -> {}",
+                            target_node_ref,
+                            node_ref,
+                            relay_nr
+                        );
+                    }
+                    NodeContactMethod::Direct(dial_info) => {
+                        network_result_try!(
+                            this.send_data_ncm_direct(node_ref, dial_info, data).await?
+                        )
+                    }
+                    NodeContactMethod::SignalReverse(relay_nr, target_node_ref) => {
+                        network_result_try!(
+                            this.send_data_ncm_signal_reverse(relay_nr, target_node_ref, data)
+                                .await?
+                        )
+                    }
+                    NodeContactMethod::SignalHolePunch(relay_nr, target_node_ref) => {
+                        network_result_try!(
+                            this.send_data_ncm_signal_hole_punch(relay_nr, target_node_ref, data)
+                                .await?
+                        )
+                    }
+                    NodeContactMethod::Existing => {
+                        network_result_try!(
+                            this.send_data_ncm_existing(target_node_ref, data).await?
+                        )
+                    }
+                    NodeContactMethod::Unreachable => {
+                        network_result_try!(
+                            this.send_data_ncm_unreachable(target_node_ref, data)
+                                .await?
+                        )
+                    }
+                };
+
+                if relayed {
+                    return Ok(NetworkResult::value(SendDataKind::Indirect));
+                }
+                Ok(NetworkResult::value(sdk))
+            }
+            .instrument(trace_span!("send_data")),
+        )
+    }
+
+    /// Send data using NodeContactMethod::Existing
+    async fn send_data_ncm_existing(
+        &self,
+        target_node_ref: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<SendDataKind>> {
+        // First try to send data to the last socket we've seen this peer on
+        let Some(connection_descriptor) = target_node_ref.last_connection() else {
+            return Ok(NetworkResult::no_connection_other(
+                "should have found an existing connection",
+            ));
+        };
+
+        if self
+            .net()
+            .send_data_to_existing_connection(connection_descriptor, data)
+            .await?
+            .is_some()
+        {
+            return Ok(NetworkResult::no_connection_other(
+                "failed to send to existing connection",
+            ));
+        }
+
+        // Update timestamp for this last connection since we just sent to it
+        target_node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+        Ok(NetworkResult::value(SendDataKind::Existing(
+            connection_descriptor,
+        )))
+    }
+
+    /// Send data using NodeContactMethod::Unreachable
+    async fn send_data_ncm_unreachable(
+        &self,
+        target_node_ref: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<SendDataKind>> {
+        // Try to send data to the last socket we've seen this peer on
+        let Some(connection_descriptor) = target_node_ref.last_connection() else {
+            return Ok(NetworkResult::no_connection_other(
+                "Node is not reachable and has no existing connection",
+            ));
+        };
+
+        if self
+            .net()
+            .send_data_to_existing_connection(connection_descriptor, data)
+            .await?
+            .is_some()
+        {
+            return Ok(NetworkResult::no_connection_other(
+                "failed to send to existing connection",
+            ));
+        }
+
+        // Update timestamp for this last connection since we just sent to it
+        target_node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+        Ok(NetworkResult::value(SendDataKind::Existing(
+            connection_descriptor,
+        )))
+    }
+
+    /// Send data using NodeContactMethod::SignalReverse
+    async fn send_data_ncm_signal_reverse(
+        &self,
+        relay_nr: NodeRef,
+        target_node_ref: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<SendDataKind>> {
+        // First try to send data to the last socket we've seen this peer on
+        let data = if let Some(connection_descriptor) = target_node_ref.last_connection() {
+            match self
+                .net()
+                .send_data_to_existing_connection(connection_descriptor, data)
+                .await?
+            {
+                None => {
+                    // Update timestamp for this last connection since we just sent to it
+                    target_node_ref
+                        .set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+                    return Ok(NetworkResult::value(SendDataKind::Existing(
+                        connection_descriptor,
+                    )));
+                }
+                Some(data) => {
+                    // Couldn't send data to existing connection
+                    // so pass the data back out
+                    data
+                }
+            }
+        } else {
+            // No last connection
+            data
+        };
+
+        let connection_descriptor = network_result_try!(
+            self.do_reverse_connect(relay_nr, target_node_ref, data)
+                .await?
+        );
+        Ok(NetworkResult::value(SendDataKind::Direct(
+            connection_descriptor,
+        )))
+    }
+
+    /// Send data using NodeContactMethod::SignalHolePunch
+    async fn send_data_ncm_signal_hole_punch(
+        &self,
+        relay_nr: NodeRef,
+        target_node_ref: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<SendDataKind>> {
+        // First try to send data to the last socket we've seen this peer on
+        let data = if let Some(connection_descriptor) = target_node_ref.last_connection() {
+            match self
+                .net()
+                .send_data_to_existing_connection(connection_descriptor, data)
+                .await?
+            {
+                None => {
+                    // Update timestamp for this last connection since we just sent to it
+                    target_node_ref
+                        .set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+                    return Ok(NetworkResult::value(SendDataKind::Existing(
+                        connection_descriptor,
+                    )));
+                }
+                Some(data) => {
+                    // Couldn't send data to existing connection
+                    // so pass the data back out
+                    data
+                }
+            }
+        } else {
+            // No last connection
+            data
+        };
+
+        let connection_descriptor =
+            network_result_try!(self.do_hole_punch(relay_nr, target_node_ref, data).await?);
+        Ok(NetworkResult::value(SendDataKind::Direct(
+            connection_descriptor,
+        )))
+    }
+
+    /// Send data using NodeContactMethod::Direct
+    async fn send_data_ncm_direct(
+        &self,
+        node_ref: NodeRef,
+        dial_info: DialInfo,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<SendDataKind>> {
+        // Since we have the best dial info already, we can find a connection to use by protocol type
+        let node_ref = node_ref.filtered_clone(NodeRefFilter::from(dial_info.make_filter()));
+
+        // First try to send data to the last socket we've seen this peer on
+        let data = if let Some(connection_descriptor) = node_ref.last_connection() {
+            #[cfg(feature = "verbose-tracing")]
+            debug!(
+                "ExistingConnection: {:?} for {:?}",
+                connection_descriptor, node_ref
+            );
+
+            match self
+                .net()
+                .send_data_to_existing_connection(connection_descriptor, data)
+                .await?
+            {
+                None => {
+                    // Update timestamp for this last connection since we just sent to it
+                    node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+                    return Ok(NetworkResult::value(SendDataKind::Existing(
+                        connection_descriptor,
+                    )));
+                }
+                Some(d) => d,
+            }
+        } else {
+            data
+        };
+
+        // New direct connection was necessary for this dial info
+        let connection_descriptor =
+            network_result_try!(self.net().send_data_to_dial_info(dial_info, data).await?);
+
+        // If we connected to this node directly, save off the last connection so we can use it again
+        node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp());
+
+        Ok(NetworkResult::value(SendDataKind::Direct(
+            connection_descriptor,
+        )))
+    }
+
+    /// Figure out how to reach a node from our own node over the best routing domain and reference the nodes we want to access
+    /// Uses NodeRefs to ensure nodes are referenced, this is not a part of 'RoutingTable' because RoutingTable is not
+    /// allowed to use NodeRefs due to recursive locking
+    pub(crate) fn get_node_contact_method(
+        &self,
+        target_node_ref: NodeRef,
+    ) -> EyreResult<NodeContactMethod> {
+        let routing_table = self.routing_table();
+
+        // Figure out the best routing domain to get the contact method over
+        let routing_domain = match target_node_ref.best_routing_domain() {
+            Some(rd) => rd,
+            None => {
+                log_net!("no routing domain for node {:?}", target_node_ref);
+                return Ok(NodeContactMethod::Unreachable);
+            }
+        };
+
+        // Get cache key
+        let ncm_key = NodeContactMethodCacheKey {
+            own_node_info_ts: routing_table.get_own_node_info_ts(routing_domain),
+            target_node_info_ts: target_node_ref.node_info_ts(routing_domain),
+            target_node_ref_filter: target_node_ref.filter_ref().cloned(),
+            target_node_ref_sequencing: target_node_ref.sequencing(),
+        };
+        if let Some(ncm) = self.inner.lock().node_contact_method_cache.get(&ncm_key) {
+            return Ok(ncm.clone());
+        }
+
+        // Node A is our own node
+        // Use whatever node info we've calculated so far
+        let peer_a = routing_table.get_best_effort_own_peer_info(routing_domain);
+
+        // Node B is the target node
+        let peer_b = match target_node_ref.make_peer_info(routing_domain) {
+            Some(ni) => ni,
+            None => {
+                log_net!("no node info for node {:?}", target_node_ref);
+                return Ok(NodeContactMethod::Unreachable);
+            }
+        };
+
+        // Dial info filter comes from the target node ref
+        let dial_info_filter = target_node_ref.dial_info_filter();
+        let sequencing = target_node_ref.sequencing();
+
+        // If the node has had lost questions or failures to send, prefer sequencing
+        // to improve reliability. The node may be experiencing UDP fragmentation drops
+        // or other firewalling issues and may perform better with TCP.
+        // let unreliable = target_node_ref.peer_stats().rpc_stats.failed_to_send > 2 || target_node_ref.peer_stats().rpc_stats.recent_lost_answers > 2;
+        // if unreliable && sequencing < Sequencing::PreferOrdered {
+        //     log_net!(debug "Node contact failing over to Ordered for {}", target_node_ref.to_string().cyan());
+        //     sequencing = Sequencing::PreferOrdered;
+        // }
+
+        // Get the best contact method with these parameters from the routing domain
+        let cm = routing_table.get_contact_method(
+            routing_domain,
+            &peer_a,
+            &peer_b,
+            dial_info_filter,
+            sequencing,
+        );
+
+        // Translate the raw contact method to a referenced contact method
+        let ncm = match cm {
+            ContactMethod::Unreachable => NodeContactMethod::Unreachable,
+            ContactMethod::Existing => NodeContactMethod::Existing,
+            ContactMethod::Direct(di) => NodeContactMethod::Direct(di),
+            ContactMethod::SignalReverse(relay_key, target_key) => {
+                let mut relay_nr = routing_table
+                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
+                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
+                if !target_node_ref.node_ids().contains(&target_key) {
+                    bail!("signalreverse target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key );
+                }
+                relay_nr.set_sequencing(sequencing);
+                NodeContactMethod::SignalReverse(relay_nr, target_node_ref)
+            }
+            ContactMethod::SignalHolePunch(relay_key, target_key) => {
+                let mut relay_nr = routing_table
+                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
+                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
+                if !target_node_ref.node_ids().contains(&target_key) {
+                    bail!("signalholepunch target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key );
+                }
+                relay_nr.set_sequencing(sequencing);
+
+                // if any other protocol were possible here we could update this and do_hole_punch
+                // but tcp hole punch is very very unreliable it seems
+                let udp_target_node_ref = target_node_ref
+                    .filtered_clone(NodeRefFilter::new().with_protocol_type(ProtocolType::UDP));
+
+                NodeContactMethod::SignalHolePunch(relay_nr, udp_target_node_ref)
+            }
+            ContactMethod::InboundRelay(relay_key) => {
+                let mut relay_nr = routing_table
+                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
+                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
+                relay_nr.set_sequencing(sequencing);
+                NodeContactMethod::InboundRelay(relay_nr)
+            }
+            ContactMethod::OutboundRelay(relay_key) => {
+                let mut relay_nr = routing_table
+                    .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)?
+                    .ok_or_else(|| eyre!("couldn't look up relay"))?;
+                relay_nr.set_sequencing(sequencing);
+                NodeContactMethod::OutboundRelay(relay_nr)
+            }
+        };
+
+        // Cache this
+        self.inner
+            .lock()
+            .node_contact_method_cache
+            .insert(ncm_key, ncm.clone(), |_, _| {});
+        Ok(ncm)
+    }
+
+    /// Send a reverse connection signal and wait for the return receipt over it
+    /// Then send the data across the new connection
+    /// Only usable for PublicInternet routing domain
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, data), err)
+    )]
+    async fn do_reverse_connect(
+        &self,
+        relay_nr: NodeRef,
+        target_nr: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<ConnectionDescriptor>> {
+        // Build a return receipt for the signal
+        let receipt_timeout = ms_to_us(
+            self.unlocked_inner
+                .config
+                .get()
+                .network
+                .reverse_connection_receipt_time_ms,
+        );
+        let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?;
+
+        // Get target routing domain
+        let Some(routing_domain) = target_nr.best_routing_domain() else {
+            return Ok(NetworkResult::no_connection_other("No routing domain for target"));
+        };
+
+        // Get our peer info
+        let Some(peer_info) = self
+            .routing_table()
+            .get_own_peer_info(routing_domain) else {
+            return Ok(NetworkResult::no_connection_other("Own peer info not available"));
+        };
+
+        // Issue the signal
+        let rpc = self.rpc_processor();
+        network_result_try!(rpc
+            .rpc_call_signal(
+                Destination::relay(relay_nr, target_nr.clone()),
+                SignalInfo::ReverseConnect { receipt, peer_info },
+            )
+            .await
+            .wrap_err("failed to send signal")?);
+
+        // Wait for the return receipt
+        let inbound_nr = match eventual_value.await.take_value().unwrap() {
+            ReceiptEvent::ReturnedPrivate { private_route: _ }
+            | ReceiptEvent::ReturnedOutOfBand
+            | ReceiptEvent::ReturnedSafety => {
+                return Ok(NetworkResult::invalid_message(
+                    "reverse connect receipt should be returned in-band",
+                ));
+            }
+            ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef,
+            ReceiptEvent::Expired => {
+                return Ok(NetworkResult::timeout());
+            }
+            ReceiptEvent::Cancelled => {
+                return Ok(NetworkResult::no_connection_other(format!(
+                    "reverse connect receipt cancelled from {}",
+                    target_nr
+                )))
+            }
+        };
+
+        // We expect the inbound noderef to be the same as the target noderef
+        // if they aren't the same, we should error on this and figure out what then hell is up
+        if !target_nr.same_entry(&inbound_nr) {
+            bail!("unexpected noderef mismatch on reverse connect");
+        }
+
+        // And now use the existing connection to send over
+        if let Some(descriptor) = inbound_nr.last_connection() {
+            match self
+                .net()
+                .send_data_to_existing_connection(descriptor, data)
+                .await?
+            {
+                None => Ok(NetworkResult::value(descriptor)),
+                Some(_) => Ok(NetworkResult::no_connection_other(
+                    "unable to send over reverse connection",
+                )),
+            }
+        } else {
+            bail!("no reverse connection available")
+        }
+    }
+
+    /// Send a hole punch signal and do a negotiating ping and wait for the return receipt
+    /// Then send the data across the new connection
+    /// Only usable for PublicInternet routing domain
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, data), err)
+    )]
+    async fn do_hole_punch(
+        &self,
+        relay_nr: NodeRef,
+        target_nr: NodeRef,
+        data: Vec<u8>,
+    ) -> EyreResult<NetworkResult<ConnectionDescriptor>> {
+        // Ensure we are filtered down to UDP (the only hole punch protocol supported today)
+        assert!(target_nr
+            .filter_ref()
+            .map(|nrf| nrf.dial_info_filter.protocol_type_set
+                == ProtocolTypeSet::only(ProtocolType::UDP))
+            .unwrap_or_default());
+
+        // Build a return receipt for the signal
+        let receipt_timeout = ms_to_us(
+            self.unlocked_inner
+                .config
+                .get()
+                .network
+                .hole_punch_receipt_time_ms,
+        );
+        let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?;
+
+        // Get target routing domain
+        let Some(routing_domain) = target_nr.best_routing_domain() else {
+            return Ok(NetworkResult::no_connection_other("No routing domain for target"));
+        };
+
+        // Get our peer info
+        let Some(peer_info) = self
+            .routing_table()
+            .get_own_peer_info(routing_domain) else {
+                return Ok(NetworkResult::no_connection_other("Own peer info not available"));
+            };
+
+        // Get the udp direct dialinfo for the hole punch
+        let hole_punch_did = target_nr
+            .first_filtered_dial_info_detail()
+            .ok_or_else(|| eyre!("No hole punch capable dialinfo found for node"))?;
+
+        // Do our half of the hole punch by sending an empty packet
+        // Both sides will do this and then the receipt will get sent over the punched hole
+        // Don't bother storing the returned connection descriptor as the 'last connection' because the other side of the hole
+        // punch should come through and create a real 'last connection' for us if this succeeds
+        network_result_try!(
+            self.net()
+                .send_data_to_dial_info(hole_punch_did.dial_info, Vec::new())
+                .await?
+        );
+
+        // Issue the signal
+        let rpc = self.rpc_processor();
+        network_result_try!(rpc
+            .rpc_call_signal(
+                Destination::relay(relay_nr, target_nr.clone()),
+                SignalInfo::HolePunch { receipt, peer_info },
+            )
+            .await
+            .wrap_err("failed to send signal")?);
+
+        // Wait for the return receipt
+        let inbound_nr = match eventual_value.await.take_value().unwrap() {
+            ReceiptEvent::ReturnedPrivate { private_route: _ }
+            | ReceiptEvent::ReturnedOutOfBand
+            | ReceiptEvent::ReturnedSafety => {
+                return Ok(NetworkResult::invalid_message(
+                    "hole punch receipt should be returned in-band",
+                ));
+            }
+            ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef,
+            ReceiptEvent::Expired => {
+                return Ok(NetworkResult::timeout());
+            }
+            ReceiptEvent::Cancelled => {
+                return Ok(NetworkResult::no_connection_other(format!(
+                    "hole punch receipt cancelled from {}",
+                    target_nr
+                )))
+            }
+        };
+
+        // We expect the inbound noderef to be the same as the target noderef
+        // if they aren't the same, we should error on this and figure out what then hell is up
+        if !target_nr.same_entry(&inbound_nr) {
+            bail!(
+                "unexpected noderef mismatch on hole punch {}, expected {}",
+                inbound_nr,
+                target_nr
+            );
+        }
+
+        // And now use the existing connection to send over
+        if let Some(descriptor) = inbound_nr.last_connection() {
+            match self
+                .net()
+                .send_data_to_existing_connection(descriptor, data)
+                .await?
+            {
+                None => Ok(NetworkResult::value(descriptor)),
+                Some(_) => Ok(NetworkResult::no_connection_other(
+                    "unable to send over hole punch",
+                )),
+            }
+        } else {
+            bail!("no hole punch available")
+        }
+    }
+}
--- a/veilid-core/src/network_manager/stats.rs
+++ b/veilid-core/src/network_manager/stats.rs
@ -0,0 +1,137 @@
+use super::*;
+
+// Statistics per address
+#[derive(Clone, Default)]
+pub struct PerAddressStats {
+    pub last_seen_ts: Timestamp,
+    pub transfer_stats_accounting: TransferStatsAccounting,
+    pub transfer_stats: TransferStatsDownUp,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub struct PerAddressStatsKey(IpAddr);
+
+impl Default for PerAddressStatsKey {
+    fn default() -> Self {
+        Self(IpAddr::V4(Ipv4Addr::UNSPECIFIED))
+    }
+}
+
+// Statistics about the low-level network
+#[derive(Clone)]
+pub struct NetworkManagerStats {
+    pub self_stats: PerAddressStats,
+    pub per_address_stats: LruCache<PerAddressStatsKey, PerAddressStats>,
+}
+
+impl Default for NetworkManagerStats {
+    fn default() -> Self {
+        Self {
+            self_stats: PerAddressStats::default(),
+            per_address_stats: LruCache::new(IPADDR_TABLE_SIZE),
+        }
+    }
+}
+
+impl NetworkManager {
+    // Callbacks from low level network for statistics gathering
+    pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) {
+        let inner = &mut *self.inner.lock();
+        inner
+            .stats
+            .self_stats
+            .transfer_stats_accounting
+            .add_up(bytes);
+        inner
+            .stats
+            .per_address_stats
+            .entry(PerAddressStatsKey(addr), |_k, _v| {
+                // do nothing on LRU evict
+            })
+            .or_insert(PerAddressStats::default())
+            .transfer_stats_accounting
+            .add_up(bytes);
+    }
+
+    pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) {
+        let inner = &mut *self.inner.lock();
+        inner
+            .stats
+            .self_stats
+            .transfer_stats_accounting
+            .add_down(bytes);
+        inner
+            .stats
+            .per_address_stats
+            .entry(PerAddressStatsKey(addr), |_k, _v| {
+                // do nothing on LRU evict
+            })
+            .or_insert(PerAddressStats::default())
+            .transfer_stats_accounting
+            .add_down(bytes);
+    }
+
+    // Get stats
+    pub fn get_stats(&self) -> NetworkManagerStats {
+        let inner = self.inner.lock();
+        inner.stats.clone()
+    }
+
+    pub fn get_veilid_state(&self) -> VeilidStateNetwork {
+        let has_state = self
+            .unlocked_inner
+            .components
+            .read()
+            .as_ref()
+            .map(|c| c.net.is_started())
+            .unwrap_or(false);
+
+        if !has_state {
+            return VeilidStateNetwork {
+                started: false,
+                bps_down: 0.into(),
+                bps_up: 0.into(),
+                peers: Vec::new(),
+            };
+        }
+        let routing_table = self.routing_table();
+
+        let (bps_down, bps_up) = {
+            let inner = self.inner.lock();
+            (
+                inner.stats.self_stats.transfer_stats.down.average,
+                inner.stats.self_stats.transfer_stats.up.average,
+            )
+        };
+
+        VeilidStateNetwork {
+            started: true,
+            bps_down,
+            bps_up,
+            peers: {
+                let mut out = Vec::new();
+                for (k, v) in routing_table.get_recent_peers() {
+                    if let Ok(Some(nr)) = routing_table.lookup_node_ref(k) {
+                        let peer_stats = nr.peer_stats();
+                        let peer = PeerTableData {
+                            node_ids: nr.node_ids().iter().copied().collect(),
+                            peer_address: v.last_connection.remote().to_string(),
+                            peer_stats,
+                        };
+                        out.push(peer);
+                    }
+                }
+                out
+            },
+        }
+    }
+
+    pub(super) fn send_network_update(&self) {
+        let update_cb = self.unlocked_inner.update_callback.read().clone();
+        if update_cb.is_none() {
+            return;
+        }
+        let state = self.get_veilid_state();
+        (update_cb.unwrap())(VeilidUpdate::Network(state));
+    }
+}
--- a/veilid-core/src/network_manager/types/connection_descriptor.rs
+++ b/veilid-core/src/network_manager/types/connection_descriptor.rs
@ -30,9 +30,7 @@ pub struct ConnectionDescriptor {

 impl ConnectionDescriptor {
    pub fn new(remote: PeerAddress, local: SocketAddress) -> Self {
-        assert!(
-            !remote.protocol_type().is_connection_oriented() || !local.address().is_unspecified()
-        );
+        assert!(!remote.protocol_type().is_ordered() || !local.address().is_unspecified());

        Self {
            remote,
--- a/veilid-core/src/network_manager/types/dial_info/mod.rs
+++ b/veilid-core/src/network_manager/types/dial_info/mod.rs
@ -491,13 +491,9 @@ impl DialInfo {
    }

    pub fn ordered_sequencing_sort(a: &DialInfo, b: &DialInfo) -> core::cmp::Ordering {
-        let ca = a.protocol_type().sort_order(Sequencing::EnsureOrdered);
-        let cb = b.protocol_type().sort_order(Sequencing::EnsureOrdered);
-        if ca < cb {
-            return core::cmp::Ordering::Less;
-        }
-        if ca > cb {
-            return core::cmp::Ordering::Greater;
+        let s = ProtocolType::ordered_sequencing_sort(a.protocol_type(), b.protocol_type());
+        if s != core::cmp::Ordering::Equal {
+            return s;
        }
        match (a, b) {
            (DialInfo::UDP(a), DialInfo::UDP(b)) => a.cmp(b),
--- a/veilid-core/src/network_manager/types/dial_info_filter.rs
+++ b/veilid-core/src/network_manager/types/dial_info_filter.rs
@ -7,6 +7,7 @@ use super::*;
    Eq,
    PartialOrd,
    Ord,
+    Hash,
    Serialize,
    Deserialize,
    RkyvArchive,
@ -61,6 +62,20 @@ impl DialInfoFilter {
    pub fn is_dead(&self) -> bool {
        self.protocol_type_set.is_empty() || self.address_type_set.is_empty()
    }
+    pub fn with_sequencing(self, sequencing: Sequencing) -> (bool, DialInfoFilter) {
+        // Get first filtered dialinfo
+        match sequencing {
+            Sequencing::NoPreference => (false, self),
+            Sequencing::PreferOrdered => (true, self),
+            Sequencing::EnsureOrdered => (
+                true,
+                self.filtered(
+                    &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
+                ),
+            ),
+        }
+        // return ordered sort and filter with ensure applied
+    }
 }

 impl fmt::Debug for DialInfoFilter {
@ -80,7 +95,24 @@ impl fmt::Debug for DialInfoFilter {
    }
 }

+impl From<ProtocolType> for DialInfoFilter {
+    fn from(other: ProtocolType) -> Self {
+        Self {
+            protocol_type_set: ProtocolTypeSet::from(other),
+            address_type_set: AddressTypeSet::all(),
+        }
+    }
+}
+
+impl From<AddressType> for DialInfoFilter {
+    fn from(other: AddressType) -> Self {
+        Self {
+            protocol_type_set: ProtocolTypeSet::all(),
+            address_type_set: AddressTypeSet::from(other),
+        }
+    }
+}
+
 pub trait MatchesDialInfoFilter {
    fn matches_filter(&self, filter: &DialInfoFilter) -> bool;
 }
-
--- a/veilid-core/src/network_manager/types/protocol_type.rs
+++ b/veilid-core/src/network_manager/types/protocol_type.rs
@ -25,7 +25,7 @@ pub enum ProtocolType {
 }

 impl ProtocolType {
-    pub fn is_connection_oriented(&self) -> bool {
+    pub fn is_ordered(&self) -> bool {
        matches!(
            self,
            ProtocolType::TCP | ProtocolType::WS | ProtocolType::WSS
@ -72,6 +72,18 @@ impl ProtocolType {
    pub fn all_ordered_set() -> ProtocolTypeSet {
        ProtocolType::TCP | ProtocolType::WS | ProtocolType::WSS
    }
+
+    pub fn ordered_sequencing_sort(a: Self, b: Self) -> core::cmp::Ordering {
+        let ca = a.sort_order(Sequencing::EnsureOrdered);
+        let cb = b.sort_order(Sequencing::EnsureOrdered);
+        if ca < cb {
+            return core::cmp::Ordering::Less;
+        }
+        if ca > cb {
+            return core::cmp::Ordering::Greater;
+        }
+        core::cmp::Ordering::Equal
+    }
 }

 impl fmt::Display for ProtocolType {
--- a/veilid-core/src/network_manager/wasm/mod.rs
+++ b/veilid-core/src/network_manager/wasm/mod.rs
@ -79,7 +79,7 @@ impl Network {

    /////////////////////////////////////////////////////////////////

-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_unbound_to_dial_info(
        &self,
        dial_info: DialInfo,
@ -119,7 +119,7 @@ impl Network {
    // This creates a short-lived connection in the case of connection-oriented protocols
    // for the purpose of sending this one message.
    // This bypasses the connection table as it is not a 'node to node' connection.
-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_recv_data_unbound_to_dial_info(
        &self,
        dial_info: DialInfo,
@ -167,7 +167,7 @@ impl Network {
        }
    }

-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_to_existing_connection(
        &self,
        descriptor: ConnectionDescriptor,
@ -212,7 +212,7 @@ impl Network {
        Ok(Some(data))
    }

-    #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))]
    pub async fn send_data_to_dial_info(
        &self,
        dial_info: DialInfo,
--- a/veilid-core/src/network_manager/wasm/protocol/ws.rs
+++ b/veilid-core/src/network_manager/wasm/protocol/ws.rs
@ -64,7 +64,7 @@ impl WebsocketNetworkConnection {
    //     self.inner.ws_meta.close().await.map_err(to_io).map(drop)
    // }

-    #[instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len()))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len())))]
    pub async fn send(&self, message: Vec<u8>) -> io::Result<NetworkResult<()>> {
        if message.len() > MAX_MESSAGE_SIZE {
            bail_io_error_other!("sending too large WS message");
@ -79,11 +79,12 @@ impl WebsocketNetworkConnection {
        .map_err(to_io)
        .into_network_result()?;

+        #[cfg(feature = "verbose-tracing")]
        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }

-    // #[instrument(level = "trace", err, skip(self), fields(network_result, ret.len))]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self), fields(network_result, ret.len)))]
    pub async fn recv(&self) -> io::Result<NetworkResult<Vec<u8>>> {
        let out = match SendWrapper::new(self.inner.ws_stream.clone().next()).await {
            Some(WsMessage::Binary(v)) => {
@ -103,7 +104,8 @@ impl WebsocketNetworkConnection {
                )));
            }
        };
-        // tracing::Span::current().record("network_result", &tracing::field::display(&out));
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("network_result", &tracing::field::display(&out));
        Ok(out)
    }
 }
--- a/veilid-core/src/routing_table/bucket_entry.rs
+++ b/veilid-core/src/routing_table/bucket_entry.rs
@ -316,7 +316,7 @@ impl BucketEntryInner {
        let last_connections = self.last_connections(
            rti,
            true,
-            Some(NodeRefFilter::new().with_routing_domain(routing_domain)),
+            NodeRefFilter::from(routing_domain),
        );
        !last_connections.is_empty()
    }
@ -370,7 +370,7 @@ impl BucketEntryInner {
        let last_connections = self.last_connections(
            rti,
            true,
-            Some(NodeRefFilter::new().with_routing_domain_set(routing_domain_set)),
+            NodeRefFilter::from(routing_domain_set),
        );
        for lc in last_connections {
            if let Some(rd) =
@ -412,7 +412,7 @@ impl BucketEntryInner {
        &self,
        rti: &RoutingTableInner,
        only_live: bool,
-        filter: Option<NodeRefFilter>,
+        filter: NodeRefFilter,
    ) -> Vec<(ConnectionDescriptor, Timestamp)> {
        let connection_manager =
            rti.unlocked_inner.network_manager.connection_manager();
@ -421,26 +421,13 @@ impl BucketEntryInner {
            .last_connections
            .iter()
            .filter_map(|(k, v)| {
-                let include = if let Some(filter) = &filter {
+                let include = {
                    let remote_address = v.0.remote_address().address();
-                    if let Some(routing_domain) = rti.routing_domain_for_address(remote_address) {
-                        if filter.routing_domain_set.contains(routing_domain)
+                    rti.routing_domain_for_address(remote_address).map(|rd| {
+                        filter.routing_domain_set.contains(rd)
                            && filter.dial_info_filter.protocol_type_set.contains(k.0)
                            && filter.dial_info_filter.address_type_set.contains(k.1)
-                        {
-                            // matches filter
-                            true
-                        } else {
-                            // does not match filter
-                            false
-                        }
-                    } else {
-                        // no valid routing domain
-                        false
-                    }
-                } else {
-                    // no filter
-                    true
+                    }).unwrap_or(false)
                };

                if !include {
@ -454,7 +441,7 @@ impl BucketEntryInner {
                // Check if the connection is still considered live
                let alive = 
                    // Should we check the connection table?
-                    if v.0.protocol_type().is_connection_oriented() {
+                    if v.0.protocol_type().is_ordered() {
                        // Look the connection up in the connection manager and see if it's still there
                        connection_manager.get_connection(v.0).is_some()
                    } else {
@ -471,8 +458,10 @@ impl BucketEntryInner {
                }
            })
            .collect();
-        // Sort with newest timestamps first
-        out.sort_by(|a, b| b.1.cmp(&a.1));
+        // Sort with newest timestamps
+        out.sort_by(|a, b| {
+            b.1.cmp(&a.1)
+        });
        out
    }

--- a/veilid-core/src/routing_table/find_peers.rs
+++ b/veilid-core/src/routing_table/find_peers.rs
@ -99,8 +99,8 @@ impl RoutingTable {
            },
        );

-        // xxx test
        // Validate peers returned are, in fact, closer to the key than the node we sent this to
+        // This same test is used on the other side so we vet things here
        let valid = match Self::verify_peers_closer(vcrypto2, own_node_id, key, &closest_nodes) {
            Ok(v) => v,
            Err(e) => {
@ -108,13 +108,16 @@ impl RoutingTable {
            }
        };
        if !valid {
-            panic!("non-closer peers returned");
+            error!(
+                "non-closer peers returned: own_node_id={:#?} key={:#?} closest_nodes={:#?}",
+                own_node_id, key, closest_nodes
+            );
        }

        NetworkResult::value(closest_nodes)
    }

-    /// Determine if set of peers is closer to key_near than key_far
+    /// Determine if set of peers is closer to key_near than key_far is to key_near
    pub(crate) fn verify_peers_closer(
        vcrypto: CryptoSystemVersion,
        key_far: TypedKey,
@ -128,14 +131,30 @@ impl RoutingTable {
        }

        let mut closer = true;
+        let d_far = vcrypto.distance(&key_far.value, &key_near.value);
        for peer in peers {
            let Some(key_peer) = peer.node_ids().get(kind) else {
                bail!("peers need to have a key with the same cryptosystem");
            };
            let d_near = vcrypto.distance(&key_near.value, &key_peer.value);
-            let d_far = vcrypto.distance(&key_far.value, &key_peer.value);
            if d_far < d_near {
+                let warning = format!(
+                    r#"peer: {}
+near (key): {} 
+far (self): {} 
+    d_near: {}
+     d_far: {}
+       cmp: {:?}"#,
+                    key_peer.value,
+                    key_near.value,
+                    key_far.value,
+                    d_near,
+                    d_far,
+                    d_near.cmp(&d_far)
+                );
+                warn!("{}", warning);
                closer = false;
+                break;
            }
        }

--- a/veilid-core/src/routing_table/mod.rs
+++ b/veilid-core/src/routing_table/mod.rs
@ -518,7 +518,10 @@ impl RoutingTable {
    }

    /// Look up the best way for two nodes to reach each other over a specific routing domain
-    #[instrument(level = "trace", skip(self), ret)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret)
+    )]
    pub fn get_contact_method(
        &self,
        routing_domain: RoutingDomain,
@ -1028,7 +1031,7 @@ impl RoutingTable {
            .sort_and_clean_closest_noderefs(node_id, closest_nodes)
    }

-    #[instrument(level = "trace", skip(self), ret)]
+    #[instrument(level = "trace", skip(self, peers), ret)]
    pub fn register_find_node_answer(
        &self,
        crypto_kind: CryptoKind,
@ -1118,7 +1121,7 @@ impl RoutingTable {
                return;
            }
            Ok(v) => v,
-        } => {
+        } => [ format!(": crypto_kind={} node_ref={} wide={}", crypto_kind, node_ref, wide) ] {
            return;
        });

@ -1134,107 +1137,13 @@ impl RoutingTable {
                        continue;
                    }
                    Ok(v) => v,
-                } => {
+                } => [ format!(": crypto_kind={} closest_nr={} wide={}", crypto_kind, closest_nr, wide) ] {
                    // Do nothing with non-values
                    continue;
                });
            }
        }
    }
-
-    pub fn make_public_internet_relay_node_filter(&self) -> impl Fn(&BucketEntryInner) -> bool {
-        // Get all our outbound protocol/address types
-        let outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::PublicInternet);
-        let mapped_port_info = self.get_low_level_port_info();
-
-        move |e: &BucketEntryInner| {
-            // Ensure this node is not on the local network
-            if e.has_node_info(RoutingDomain::LocalNetwork.into()) {
-                return false;
-            }
-
-            // Disqualify nodes that don't cover all our inbound ports for tcp and udp
-            // as we need to be able to use the relay for keepalives for all nat mappings
-            let mut low_level_protocol_ports = mapped_port_info.low_level_protocol_ports.clone();
-
-            let can_serve_as_relay = e
-                .node_info(RoutingDomain::PublicInternet)
-                .map(|n| {
-                    let dids = n.all_filtered_dial_info_details(
-                        Some(DialInfoDetail::ordered_sequencing_sort), // By default, choose connection-oriented protocol for relay
-                        |did| did.matches_filter(&outbound_dif),
-                    );
-                    for did in &dids {
-                        let pt = did.dial_info.protocol_type();
-                        let at = did.dial_info.address_type();
-                        if let Some((llpt, port)) = mapped_port_info.protocol_to_port.get(&(pt, at))
-                        {
-                            low_level_protocol_ports.remove(&(*llpt, at, *port));
-                        }
-                    }
-                    low_level_protocol_ports.is_empty()
-                })
-                .unwrap_or(false);
-            if !can_serve_as_relay {
-                return false;
-            }
-
-            true
-        }
-    }
-
-    #[instrument(level = "trace", skip(self), ret)]
-    pub fn find_inbound_relay(
-        &self,
-        routing_domain: RoutingDomain,
-        cur_ts: Timestamp,
-    ) -> Option<NodeRef> {
-        // Get relay filter function
-        let relay_node_filter = match routing_domain {
-            RoutingDomain::PublicInternet => self.make_public_internet_relay_node_filter(),
-            RoutingDomain::LocalNetwork => {
-                unimplemented!();
-            }
-        };
-
-        // Go through all entries and find fastest entry that matches filter function
-        let inner = self.inner.read();
-        let inner = &*inner;
-        let mut best_inbound_relay: Option<Arc<BucketEntry>> = None;
-
-        // Iterate all known nodes for candidates
-        inner.with_entries(cur_ts, BucketEntryState::Unreliable, |rti, entry| {
-            let entry2 = entry.clone();
-            entry.with(rti, |rti, e| {
-                // Ensure we have the node's status
-                if let Some(node_status) = e.node_status(routing_domain) {
-                    // Ensure the node will relay
-                    if node_status.will_relay() {
-                        // Compare against previous candidate
-                        if let Some(best_inbound_relay) = best_inbound_relay.as_mut() {
-                            // Less is faster
-                            let better = best_inbound_relay.with(rti, |_rti, best| {
-                                // choose low latency stability for relays
-                                BucketEntryInner::cmp_fastest_reliable(cur_ts, e, best)
-                                    == std::cmp::Ordering::Less
-                            });
-                            // Now apply filter function and see if this node should be included
-                            if better && relay_node_filter(e) {
-                                *best_inbound_relay = entry2;
-                            }
-                        } else if relay_node_filter(e) {
-                            // Always store the first candidate
-                            best_inbound_relay = Some(entry2);
-                        }
-                    }
-                }
-            });
-            // Don't end early, iterate through all entries
-            Option::<()>::None
-        });
-        // Return the best inbound relay noderef
-        best_inbound_relay.map(|e| NodeRef::new(self.clone(), e, None))
-    }
 }

 impl core::ops::Deref for RoutingTable {
--- a/veilid-core/src/routing_table/node_ref.rs
+++ b/veilid-core/src/routing_table/node_ref.rs
@ -217,25 +217,24 @@ pub trait NodeRefBase: Sized {
    fn first_filtered_dial_info_detail(&self) -> Option<DialInfoDetail> {
        let routing_domain_set = self.routing_domain_set();
        let dial_info_filter = self.dial_info_filter();
+        let sequencing = self.common().sequencing;
+        let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing);

-        let (sort, dial_info_filter) = match self.common().sequencing {
-            Sequencing::NoPreference => (None, dial_info_filter),
-            Sequencing::PreferOrdered => (
-                Some(DialInfoDetail::ordered_sequencing_sort),
-                dial_info_filter,
-            ),
-            Sequencing::EnsureOrdered => (
-                Some(DialInfoDetail::ordered_sequencing_sort),
-                dial_info_filter.filtered(
-                    &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
-                ),
-            ),
+        let sort = if ordered {
+            Some(DialInfoDetail::ordered_sequencing_sort)
+        } else {
+            None
        };

+        if dial_info_filter.is_dead() {
+            return None;
+        }
+
+        let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
+
        self.operate(|_rt, e| {
            for routing_domain in routing_domain_set {
                if let Some(ni) = e.node_info(routing_domain) {
-                    let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
                    if let Some(did) = ni.first_filtered_dial_info_detail(sort, filter) {
                        return Some(did);
                    }
@ -278,11 +277,22 @@ pub trait NodeRefBase: Sized {
        out
    }

+    /// Get the most recent 'last connection' to this node
+    /// Filtered first and then sorted by ordering preference and then by most recent
    fn last_connection(&self) -> Option<ConnectionDescriptor> {
-        // Get the last connections and the last time we saw anything with this connection
-        // Filtered first and then sorted by most recent
        self.operate(|rti, e| {
-            let last_connections = e.last_connections(rti, true, self.common().filter.clone());
+            // apply sequencing to filter and get sort
+            let sequencing = self.common().sequencing;
+            let filter = self.common().filter.clone().unwrap_or_default();
+            let (ordered, filter) = filter.with_sequencing(sequencing);
+            let mut last_connections = e.last_connections(rti, true, filter);
+
+            if ordered {
+                last_connections.sort_by(|a, b| {
+                    ProtocolType::ordered_sequencing_sort(a.0.protocol_type(), b.0.protocol_type())
+                });
+            }
+
            last_connections.first().map(|x| x.0)
        })
    }
--- a/veilid-core/src/routing_table/node_ref_filter.rs
+++ b/veilid-core/src/routing_table/node_ref_filter.rs
@ -1,6 +1,6 @@
 use super::*;

-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct NodeRefFilter {
    pub routing_domain_set: RoutingDomainSet,
    pub dial_info_filter: DialInfoFilter,
@ -19,7 +19,6 @@ impl NodeRefFilter {
            dial_info_filter: DialInfoFilter::all(),
        }
    }
-
    pub fn with_routing_domain(mut self, routing_domain: RoutingDomain) -> Self {
        self.routing_domain_set = routing_domain.into();
        self
@ -58,4 +57,54 @@ impl NodeRefFilter {
    pub fn is_dead(&self) -> bool {
        self.dial_info_filter.is_dead() || self.routing_domain_set.is_empty()
    }
+    pub fn with_sequencing(mut self, sequencing: Sequencing) -> (bool, Self) {
+        let (ordered, dif) = self.dial_info_filter.with_sequencing(sequencing);
+        self.dial_info_filter = dif;
+        (ordered, self)
+    }
+}
+
+impl From<RoutingDomain> for NodeRefFilter {
+    fn from(other: RoutingDomain) -> Self {
+        Self {
+            routing_domain_set: other.into(),
+            dial_info_filter: DialInfoFilter::all(),
+        }
+    }
+}
+
+impl From<RoutingDomainSet> for NodeRefFilter {
+    fn from(other: RoutingDomainSet) -> Self {
+        Self {
+            routing_domain_set: other,
+            dial_info_filter: DialInfoFilter::all(),
+        }
+    }
+}
+
+impl From<DialInfoFilter> for NodeRefFilter {
+    fn from(other: DialInfoFilter) -> Self {
+        Self {
+            routing_domain_set: RoutingDomainSet::all(),
+            dial_info_filter: other,
+        }
+    }
+}
+
+impl From<ProtocolType> for NodeRefFilter {
+    fn from(other: ProtocolType) -> Self {
+        Self {
+            routing_domain_set: RoutingDomainSet::all(),
+            dial_info_filter: DialInfoFilter::from(other),
+        }
+    }
+}
+
+impl From<AddressType> for NodeRefFilter {
+    fn from(other: AddressType) -> Self {
+        Self {
+            routing_domain_set: RoutingDomainSet::all(),
+            dial_info_filter: DialInfoFilter::from(other),
+        }
+    }
 }
--- a/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs
+++ b/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs
@ -533,7 +533,7 @@ impl RouteSpecStore {
    }

    /// validate data using a private route's key and signature chain
-    #[instrument(level = "trace", skip(self, data, callback), ret)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, data, callback), ret))]
    pub fn with_signature_validated_route<F,R>(
        &self,
        public_key: &TypedKey,
@ -593,7 +593,7 @@ impl RouteSpecStore {
        Some(callback(rssd, rsd))
    }

-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))]
    async fn test_allocated_route(&self, private_route_id: RouteId) -> EyreResult<bool> {
        // Make loopback route to test with
        let dest = {
@ -616,8 +616,8 @@ impl RouteSpecStore {
            let private_route = self.assemble_private_route(&key, None)?;
            // Always test routes with safety routes that are more likely to succeed
            let stability = Stability::Reliable;
-            // Routes can test with whatever sequencing they were allocated with
-            let sequencing = Sequencing::NoPreference;
+            // Routes should test with the most likely to succeed sequencing they are capable of
+            let sequencing = Sequencing::PreferOrdered;

            let safety_spec = SafetySpec {
                preferred_route: Some(private_route_id),
@ -657,12 +657,17 @@ impl RouteSpecStore {
                bail!("no best key to test remote route");
            };

+            // Always test routes with safety routes that are more likely to succeed
+            let stability = Stability::Reliable;
+            // Routes should test with the most likely to succeed sequencing they are capable of
+            let sequencing = Sequencing::PreferOrdered;
+
            // Get a safety route that is good enough
            let safety_spec = SafetySpec {
                preferred_route: None,
                hop_count: self.unlocked_inner.default_route_hop_count,
-                stability: Stability::default(),
-                sequencing: Sequencing::default(),
+                stability,
+                sequencing,
            };

            let safety_selection = SafetySelection::Safe(safety_spec);
@ -711,7 +716,7 @@ impl RouteSpecStore {
    }

    /// Test an allocated route for continuity
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))]
    pub async fn test_route(&self, id: RouteId) -> EyreResult<bool> {
        let is_remote = self.is_route_id_remote(&id);
        if is_remote {
@ -1078,7 +1083,7 @@ impl RouteSpecStore {
    }

    /// Get an allocated route that matches a particular safety spec
-    #[instrument(level = "trace", skip(self, inner, rti), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, inner, rti), ret, err))]
    fn get_route_for_safety_spec_inner(
        &self,
        inner: &mut RouteSpecStoreInner,
@ -1149,7 +1154,7 @@ impl RouteSpecStore {
    }

    /// Get a private route to use for the answer to question
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))]
    pub fn get_private_route_for_safety_spec(
        &self,
        crypto_kind: CryptoKind,
@ -1258,7 +1263,7 @@ impl RouteSpecStore {
    
    /// Assemble a single private route for publication
    /// Returns a PrivateRoute object for an allocated private route key
-    #[instrument(level = "trace", skip(self), err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), err))]
    pub fn assemble_private_route(
        &self,
        key: &PublicKey,
@ -1285,7 +1290,7 @@ impl RouteSpecStore {

    /// Assemble private route set for publication
    /// Returns a vec of PrivateRoute objects for an allocated private route
-    #[instrument(level = "trace", skip(self), err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), err))]
    pub fn assemble_private_routes(
        &self,
        id: &RouteId,
@ -1311,7 +1316,7 @@ impl RouteSpecStore {
    /// Import a remote private route for compilation
    /// It is safe to import the same route more than once and it will return the same route id
    /// Returns a route set id
-    #[instrument(level = "trace", skip(self, blob), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, blob), ret, err))]
    pub fn import_remote_private_route(&self, blob: Vec<u8>) -> EyreResult<RouteId> { 
        let cur_ts = get_aligned_timestamp();
        
@ -1342,7 +1347,7 @@ impl RouteSpecStore {
    }

    /// Release a remote private route that is no longer in use
-    #[instrument(level = "trace", skip(self), ret)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret))]
    pub fn release_remote_private_route(&self, id: RouteId) -> bool {
        let inner = &mut *self.inner.lock();
        inner.cache.remove_remote_private_route(id)
--- a/veilid-core/src/routing_table/route_spec_store/route_stats.rs
+++ b/veilid-core/src/routing_table/route_spec_store/route_stats.rs
@ -61,6 +61,9 @@ impl RouteStats {
    pub fn record_sent(&mut self, cur_ts: Timestamp, bytes: ByteCount) {
        self.last_sent_ts = Some(cur_ts);
        self.transfer_stats_accounting.add_up(bytes);
+
+        // If we sent successfully, then reset 'failed_to_send'
+        self.failed_to_send = 0;
    }

    /// Mark a route as having been sent to
@ -101,6 +104,8 @@ impl RouteStats {
        self.last_tested_ts = None;
        self.last_sent_ts = None;
        self.last_received_ts = None;
+        self.failed_to_send = 0;
+        self.questions_lost = 0;
    }

    /// Check if a route needs testing
--- a/veilid-core/src/routing_table/routing_domains.rs
+++ b/veilid-core/src/routing_table/routing_domains.rs
@ -223,7 +223,7 @@ impl Default for PublicInternetRoutingDomainDetail {
    }
 }

-fn first_filtered_dial_info_detail(
+fn first_filtered_dial_info_detail_between_nodes(
    from_node: &NodeInfo,
    to_node: &NodeInfo,
    dial_info_filter: &DialInfoFilter,
@ -235,28 +235,21 @@ fn first_filtered_dial_info_detail(
            .with_protocol_type_set(from_node.outbound_protocols()),
    );

-    // Get first filtered dialinfo
-    let (sort, dial_info_filter) = match sequencing {
-        Sequencing::NoPreference => (None, dial_info_filter),
-        Sequencing::PreferOrdered => (
-            Some(DialInfoDetail::ordered_sequencing_sort),
-            dial_info_filter,
-        ),
-        Sequencing::EnsureOrdered => (
-            Some(DialInfoDetail::ordered_sequencing_sort),
-            dial_info_filter.filtered(
-                &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
-            ),
-        ),
+    // Apply sequencing and get sort
+    let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing);
+    let sort = if ordered {
+        Some(DialInfoDetail::ordered_sequencing_sort)
+    } else {
+        None
    };
+
    // If the filter is dead then we won't be able to connect
    if dial_info_filter.is_dead() {
        return None;
    }

-    let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
-
    // Get the best match dial info for node B if we have it
+    let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
    to_node.first_filtered_dial_info_detail(sort, direct_filter)
 }

@ -294,7 +287,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {

        // Get the best match dial info for node B if we have it
        if let Some(target_did) =
-            first_filtered_dial_info_detail(node_a, node_b, &dial_info_filter, sequencing)
+            first_filtered_dial_info_detail_between_nodes(node_a, node_b, &dial_info_filter, sequencing)
        {
            // Do we need to signal before going inbound?
            if !target_did.class.requires_signal() {
@ -319,7 +312,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
                };

                // Can node A reach the inbound relay directly?
-                if first_filtered_dial_info_detail(
+                if first_filtered_dial_info_detail_between_nodes(
                    node_a,
                    node_b_relay,
                    &dial_info_filter,
@ -332,7 +325,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
                        ///////// Reverse connection

                        // Get the best match dial info for an reverse inbound connection from node B to node A
-                        if let Some(reverse_did) = first_filtered_dial_info_detail(
+                        if let Some(reverse_did) = first_filtered_dial_info_detail_between_nodes(
                            node_b,
                            node_a,
                            &dial_info_filter,
@ -358,14 +351,14 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
                        let udp_dial_info_filter = dial_info_filter
                            .clone()
                            .filtered(&DialInfoFilter::all().with_protocol_type(ProtocolType::UDP));
-                        if let Some(target_udp_did) = first_filtered_dial_info_detail(
+                        if let Some(target_udp_did) = first_filtered_dial_info_detail_between_nodes(
                            node_a,
                            node_b,
                            &udp_dial_info_filter,
                            sequencing,
                        ) {
                            // Does node A have a direct udp dialinfo that node B can reach?
-                            if let Some(reverse_udp_did) = first_filtered_dial_info_detail(
+                            if let Some(reverse_udp_did) = first_filtered_dial_info_detail_between_nodes(
                                node_b,
                                node_a,
                                &udp_dial_info_filter,
@ -407,7 +400,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
            };

            // Can we reach the full relay?
-            if first_filtered_dial_info_detail(
+            if first_filtered_dial_info_detail_between_nodes(
                node_a,
                &node_b_relay,
                &dial_info_filter,
--- a/veilid-core/src/routing_table/routing_table_inner.rs
+++ b/veilid-core/src/routing_table/routing_table_inner.rs
@ -111,6 +111,9 @@ impl RoutingTableInner {
        routing_domain_set: RoutingDomainSet,
        filter: &DialInfoFilter,
    ) -> Option<DialInfoDetail> {
+        if filter.is_dead() || routing_domain_set.is_empty() {
+            return None;
+        }
        for routing_domain in routing_domain_set {
            let did = self.with_routing_domain(routing_domain, |rd| {
                for did in rd.common().dial_info_details() {
@ -133,6 +136,9 @@ impl RoutingTableInner {
        filter: &DialInfoFilter,
    ) -> Vec<DialInfoDetail> {
        let mut ret = Vec::new();
+        if filter.is_dead() || routing_domain_set.is_empty() {
+            return ret;
+        }
        for routing_domain in routing_domain_set {
            self.with_routing_domain(routing_domain, |rd| {
                for did in rd.common().dial_info_details() {
@ -201,7 +207,10 @@ impl RoutingTableInner {
        true
    }

-    #[instrument(level = "trace", skip(self), ret)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret)
+    )]
    pub fn get_contact_method(
        &self,
        routing_domain: RoutingDomain,
--- a/veilid-core/src/routing_table/tasks/bootstrap.rs
+++ b/veilid-core/src/routing_table/tasks/bootstrap.rs
@ -377,11 +377,8 @@ impl RoutingTable {

                        // Ensure we got the signed peer info
                        if !nr.signed_node_info_has_valid_signature(RoutingDomain::PublicInternet) {
-                            log_rtab!(warn
-                                "bootstrap at {:?} did not return valid signed node info",
-                                nr
-                            );
-                            // If this node info is invalid, it will time out after being unpingable
+                            log_rtab!(warn "bootstrap server is not responding");
+                            log_rtab!(debug "bootstrap server is not responding: {}", nr);
                        } else {
                            // otherwise this bootstrap is valid, lets ask it to find ourselves now
                            routing_table.reverse_find_node(crypto_kind, nr, true).await
--- a/veilid-core/src/routing_table/tasks/private_route_management.rs
+++ b/veilid-core/src/routing_table/tasks/private_route_management.rs
@ -198,12 +198,12 @@ impl RoutingTable {
            // Newly allocated routes
            let mut newly_allocated_routes = Vec::new();
            for _n in 0..routes_to_allocate {
-                // Parameters here must be the default safety route spec
+                // Parameters here must be the most inclusive safety route spec
                // These will be used by test_remote_route as well
                if let Some(k) = rss.allocate_route(
                    &VALID_CRYPTO_KINDS,
                    Stability::default(),
-                    Sequencing::default(),
+                    Sequencing::EnsureOrdered,
                    default_route_hop_count,
                    DirectionSet::all(),
                    &[],
--- a/veilid-core/src/routing_table/tasks/relay_management.rs
+++ b/veilid-core/src/routing_table/tasks/relay_management.rs
@ -81,4 +81,97 @@ impl RoutingTable {

        Ok(())
    }
+
+    pub fn make_public_internet_relay_node_filter(&self) -> impl Fn(&BucketEntryInner) -> bool {
+        // Get all our outbound protocol/address types
+        let outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::PublicInternet);
+        let mapped_port_info = self.get_low_level_port_info();
+
+        move |e: &BucketEntryInner| {
+            // Ensure this node is not on the local network
+            if e.has_node_info(RoutingDomain::LocalNetwork.into()) {
+                return false;
+            }
+
+            // Disqualify nodes that don't cover all our inbound ports for tcp and udp
+            // as we need to be able to use the relay for keepalives for all nat mappings
+            let mut low_level_protocol_ports = mapped_port_info.low_level_protocol_ports.clone();
+
+            let can_serve_as_relay = e
+                .node_info(RoutingDomain::PublicInternet)
+                .map(|n| {
+                    let dids = n.all_filtered_dial_info_details(DialInfoDetail::NO_SORT, |did| {
+                        did.matches_filter(&outbound_dif)
+                    });
+                    for did in &dids {
+                        let pt = did.dial_info.protocol_type();
+                        let at = did.dial_info.address_type();
+                        if let Some((llpt, port)) = mapped_port_info.protocol_to_port.get(&(pt, at))
+                        {
+                            low_level_protocol_ports.remove(&(*llpt, at, *port));
+                        }
+                    }
+                    low_level_protocol_ports.is_empty()
+                })
+                .unwrap_or(false);
+            if !can_serve_as_relay {
+                return false;
+            }
+
+            true
+        }
+    }
+
+    #[instrument(level = "trace", skip(self), ret)]
+    pub fn find_inbound_relay(
+        &self,
+        routing_domain: RoutingDomain,
+        cur_ts: Timestamp,
+    ) -> Option<NodeRef> {
+        // Get relay filter function
+        let relay_node_filter = match routing_domain {
+            RoutingDomain::PublicInternet => self.make_public_internet_relay_node_filter(),
+            RoutingDomain::LocalNetwork => {
+                unimplemented!();
+            }
+        };
+
+        // Go through all entries and find fastest entry that matches filter function
+        let inner = self.inner.read();
+        let inner = &*inner;
+        let mut best_inbound_relay: Option<Arc<BucketEntry>> = None;
+
+        // Iterate all known nodes for candidates
+        inner.with_entries(cur_ts, BucketEntryState::Unreliable, |rti, entry| {
+            let entry2 = entry.clone();
+            entry.with(rti, |rti, e| {
+                // Ensure we have the node's status
+                if let Some(node_status) = e.node_status(routing_domain) {
+                    // Ensure the node will relay
+                    if node_status.will_relay() {
+                        // Compare against previous candidate
+                        if let Some(best_inbound_relay) = best_inbound_relay.as_mut() {
+                            // Less is faster
+                            let better = best_inbound_relay.with(rti, |_rti, best| {
+                                // choose low latency stability for relays
+                                BucketEntryInner::cmp_fastest_reliable(cur_ts, e, best)
+                                    == std::cmp::Ordering::Less
+                            });
+                            // Now apply filter function and see if this node should be included
+                            if better && relay_node_filter(e) {
+                                *best_inbound_relay = entry2;
+                            }
+                        } else if relay_node_filter(e) {
+                            // Always store the first candidate
+                            best_inbound_relay = Some(entry2);
+                        }
+                    }
+                }
+            });
+            // Don't end early, iterate through all entries
+            Option::<()>::None
+        });
+        // Return the best inbound relay noderef
+        best_inbound_relay.map(|e| NodeRef::new(self.clone(), e, None))
+    }
 }
--- a/veilid-core/src/routing_table/types/dial_info_detail.rs
+++ b/veilid-core/src/routing_table/types/dial_info_detail.rs
@ -29,13 +29,11 @@ impl MatchesDialInfoFilter for DialInfoDetail {

 impl DialInfoDetail {
    pub fn ordered_sequencing_sort(a: &DialInfoDetail, b: &DialInfoDetail) -> core::cmp::Ordering {
-        if a.class < b.class {
-            return core::cmp::Ordering::Less;
+        let c = DialInfo::ordered_sequencing_sort(&a.dial_info, &b.dial_info);
+        if c != core::cmp::Ordering::Equal {
+            return c;
        }
-        if a.class > b.class {
-            return core::cmp::Ordering::Greater;
-        }
-        DialInfo::ordered_sequencing_sort(&a.dial_info, &b.dial_info)
+        a.class.cmp(&b.class)
    }
    pub const NO_SORT: std::option::Option<
        for<'r, 's> fn(&'r DialInfoDetail, &'s DialInfoDetail) -> std::cmp::Ordering,
--- a/veilid-core/src/routing_table/types/signed_node_info.rs
+++ b/veilid-core/src/routing_table/types/signed_node_info.rs
@ -71,7 +71,7 @@ impl SignedNodeInfo {
            match sequencing {
                Sequencing::NoPreference | Sequencing::PreferOrdered => return true,
                Sequencing::EnsureOrdered => {
-                    if did.dial_info.protocol_type().is_connection_oriented() {
+                    if did.dial_info.protocol_type().is_ordered() {
                        return true;
                    }
                }
@ -85,7 +85,7 @@ impl SignedNodeInfo {
                    match sequencing {
                        Sequencing::NoPreference | Sequencing::PreferOrdered => return true,
                        Sequencing::EnsureOrdered => {
-                            if did.dial_info.protocol_type().is_connection_oriented() {
+                            if did.dial_info.protocol_type().is_ordered() {
                                return true;
                            }
                        }
--- a/veilid-core/src/rpc_processor/coders/operations/operation_route.rs
+++ b/veilid-core/src/rpc_processor/coders/operations/operation_route.rs
@ -1,6 +1,6 @@
 use super::*;

-#[derive(Debug, Clone)]
+#[derive(Clone)]
 pub struct RoutedOperation {
    sequencing: Sequencing,
    signatures: Vec<Signature>,
@ -8,6 +8,17 @@ pub struct RoutedOperation {
    data: Vec<u8>,
 }

+impl fmt::Debug for RoutedOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("RoutedOperation")
+            .field("sequencing", &self.sequencing)
+            .field("signatures.len", &self.signatures.len())
+            .field("nonce", &self.nonce)
+            .field("data(len)", &self.data.len())
+            .finish()
+    }
+}
+
 impl RoutedOperation {
    pub fn new(sequencing: Sequencing, nonce: Nonce, data: Vec<u8>) -> Self {
        Self {
--- a/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs
+++ b/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs
@ -4,7 +4,7 @@ use crate::storage_manager::SignedValueData;
 #[derive(Debug, Clone)]
 pub struct RPCOperationValueChanged {
    key: TypedKey,
-    subkeys: Vec<ValueSubkeyRange>,
+    subkeys: ValueSubkeyRangeSet,
    count: u32,
    value: SignedValueData,
 }
@ -12,7 +12,7 @@ pub struct RPCOperationValueChanged {
 impl RPCOperationValueChanged {
    pub fn new(
        key: TypedKey,
-        subkeys: Vec<ValueSubkeyRange>,
+        subkeys: ValueSubkeyRangeSet,
        count: u32,
        value: SignedValueData,
    ) -> Self {
@ -32,7 +32,7 @@ impl RPCOperationValueChanged {
    pub fn key(&self) -> &TypedKey {
        &self.key
    }
-    pub fn subkeys(&self) -> &[ValueSubkeyRange] {
+    pub fn subkeys(&self) -> &ValueSubkeyRangeSet {
        &self.subkeys
    }
    pub fn count(&self) -> u32 {
@ -41,7 +41,7 @@ impl RPCOperationValueChanged {
    pub fn value(&self) -> &SignedValueData {
        &self.value
    }
-    pub fn destructure(self) -> (TypedKey, Vec<ValueSubkeyRange>, u32, SignedValueData) {
+    pub fn destructure(self) -> (TypedKey, ValueSubkeyRangeSet, u32, SignedValueData) {
        (self.key, self.subkeys, self.count, self.value)
    }

@ -52,25 +52,20 @@ impl RPCOperationValueChanged {
        let key = decode_typed_key(&k_reader)?;

        let sk_reader = reader.get_subkeys().map_err(RPCError::protocol)?;
-        let mut subkeys = Vec::<ValueSubkeyRange>::with_capacity(
-            sk_reader
-                .len()
-                .try_into()
-                .map_err(RPCError::map_protocol("too many subkey ranges"))?,
-        );
+        let mut subkeys = ValueSubkeyRangeSet::new();
        for skr in sk_reader.iter() {
            let vskr = (skr.get_start(), skr.get_end());
            if vskr.0 > vskr.1 {
                return Err(RPCError::protocol("invalid subkey range"));
            }
            if let Some(lvskr) = subkeys.last() {
-                if lvskr.1 >= vskr.0 {
+                if lvskr >= vskr.0 {
                    return Err(RPCError::protocol(
                        "subkey range out of order or not merged",
                    ));
                }
            }
-            subkeys.push(vskr);
+            subkeys.ranges_insert(vskr.0..=vskr.1);
        }
        let count = reader.get_count();
        let v_reader = reader.get_value().map_err(RPCError::protocol)?;
@ -91,14 +86,14 @@ impl RPCOperationValueChanged {

        let mut sk_builder = builder.reborrow().init_subkeys(
            self.subkeys
-                .len()
+                .ranges_len()
                .try_into()
                .map_err(RPCError::map_internal("invalid subkey range list length"))?,
        );
-        for (i, skr) in self.subkeys.iter().enumerate() {
+        for (i, skr) in self.subkeys.ranges().enumerate() {
            let mut skr_builder = sk_builder.reborrow().get(i as u32);
-            skr_builder.set_start(skr.0);
-            skr_builder.set_end(skr.1);
+            skr_builder.set_start(*skr.start());
+            skr_builder.set_end(*skr.end());
        }

        builder.set_count(self.count);
--- a/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs
+++ b/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs
@ -6,7 +6,7 @@ const MAX_WATCH_VALUE_A_PEERS_LEN: usize = 20;
 #[derive(Debug, Clone)]
 pub struct RPCOperationWatchValueQ {
    key: TypedKey,
-    subkeys: Vec<ValueSubkeyRange>,
+    subkeys: ValueSubkeyRangeSet,
    expiration: u64,
    count: u32,
    watcher: PublicKey,
@ -16,7 +16,7 @@ pub struct RPCOperationWatchValueQ {
 impl RPCOperationWatchValueQ {
    pub fn new(
        key: TypedKey,
-        subkeys: Vec<ValueSubkeyRange>,
+        subkeys: ValueSubkeyRangeSet,
        expiration: u64,
        count: u32,
        watcher: PublicKey,
@ -41,9 +41,9 @@ impl RPCOperationWatchValueQ {
            Vec::with_capacity(PUBLIC_KEY_LENGTH + 4 + (self.subkeys.len() * 8) + 8 + 4);
        sig_data.extend_from_slice(&self.key.kind.0);
        sig_data.extend_from_slice(&self.key.value.bytes);
-        for sk in &self.subkeys {
-            sig_data.extend_from_slice(&sk.0.to_le_bytes());
-            sig_data.extend_from_slice(&sk.1.to_le_bytes());
+        for sk in self.subkeys.ranges() {
+            sig_data.extend_from_slice(&sk.start().to_le_bytes());
+            sig_data.extend_from_slice(&sk.end().to_le_bytes());
        }
        sig_data.extend_from_slice(&self.expiration.to_le_bytes());
        sig_data.extend_from_slice(&self.count.to_le_bytes());
@ -66,7 +66,7 @@ impl RPCOperationWatchValueQ {
    pub fn key(&self) -> &TypedKey {
        &self.key
    }
-    pub fn subkeys(&self) -> &[ValueSubkeyRange] {
+    pub fn subkeys(&self) -> &ValueSubkeyRangeSet {
        &self.subkeys
    }
    pub fn expiration(&self) -> u64 {
@ -86,7 +86,7 @@ impl RPCOperationWatchValueQ {
        self,
    ) -> (
        TypedKey,
-        Vec<ValueSubkeyRange>,
+        ValueSubkeyRangeSet,
        u64,
        u32,
        PublicKey,
@ -112,25 +112,20 @@ impl RPCOperationWatchValueQ {
        if sk_reader.len() as usize > MAX_WATCH_VALUE_Q_SUBKEYS_LEN {
            return Err(RPCError::protocol("WatchValueQ subkeys length too long"));
        }
-        let mut subkeys = Vec::<ValueSubkeyRange>::with_capacity(
-            sk_reader
-                .len()
-                .try_into()
-                .map_err(RPCError::map_protocol("too many subkey ranges"))?,
-        );
+        let mut subkeys = ValueSubkeyRangeSet::new();
        for skr in sk_reader.iter() {
            let vskr = (skr.get_start(), skr.get_end());
            if vskr.0 > vskr.1 {
                return Err(RPCError::protocol("invalid subkey range"));
            }
            if let Some(lvskr) = subkeys.last() {
-                if lvskr.1 >= vskr.0 {
+                if lvskr >= vskr.0 {
                    return Err(RPCError::protocol(
                        "subkey range out of order or not merged",
                    ));
                }
            }
-            subkeys.push(vskr);
+            subkeys.ranges_insert(vskr.0..=vskr.1);
        }

        let expiration = reader.get_expiration();
@ -165,10 +160,10 @@ impl RPCOperationWatchValueQ {
                .try_into()
                .map_err(RPCError::map_internal("invalid subkey range list length"))?,
        );
-        for (i, skr) in self.subkeys.iter().enumerate() {
+        for (i, skr) in self.subkeys.ranges().enumerate() {
            let mut skr_builder = sk_builder.reborrow().get(i as u32);
-            skr_builder.set_start(skr.0);
-            skr_builder.set_end(skr.1);
+            skr_builder.set_start(*skr.start());
+            skr_builder.set_end(*skr.end());
        }
        builder.set_expiration(self.expiration);
        builder.set_count(self.count);
--- a/veilid-core/src/rpc_processor/destination.rs
+++ b/veilid-core/src/rpc_processor/destination.rs
@ -44,7 +44,7 @@ impl Destination {
        }
    }
    pub fn relay(relay: NodeRef, target: NodeRef) -> Self {
-        let sequencing = relay.sequencing();
+        let sequencing = relay.sequencing().max(target.sequencing());
        Self::Relay {
            relay,
            target,
--- a/veilid-core/src/rpc_processor/mod.rs
+++ b/veilid-core/src/rpc_processor/mod.rs
@ -110,6 +110,28 @@ impl RPCMessageHeader {
            RPCMessageHeaderDetail::PrivateRouted(p) => p.direct.envelope.get_crypto_kind(),
        }
    }
+    // pub fn direct_peer_noderef(&self) -> NodeRef {
+    //     match &self.detail {
+    //         RPCMessageHeaderDetail::Direct(d) => d.peer_noderef.clone(),
+    //         RPCMessageHeaderDetail::SafetyRouted(s) => s.direct.peer_noderef.clone(),
+    //         RPCMessageHeaderDetail::PrivateRouted(p) => p.direct.peer_noderef.clone(),
+    //     }
+    // }
+    pub fn direct_sender_node_id(&self) -> TypedKey {
+        match &self.detail {
+            RPCMessageHeaderDetail::Direct(d) => {
+                TypedKey::new(d.envelope.get_crypto_kind(), d.envelope.get_sender_id())
+            }
+            RPCMessageHeaderDetail::SafetyRouted(s) => TypedKey::new(
+                s.direct.envelope.get_crypto_kind(),
+                s.direct.envelope.get_sender_id(),
+            ),
+            RPCMessageHeaderDetail::PrivateRouted(p) => TypedKey::new(
+                p.direct.envelope.get_crypto_kind(),
+                p.direct.envelope.get_sender_id(),
+            ),
+        }
+    }
 }

 #[derive(Debug)]
@ -182,7 +204,7 @@ impl<T> Answer<T> {
    }
 }

-/// An operation that has been fully prepared for envelope r
+/// An operation that has been fully prepared for envelope
 struct RenderedOperation {
    /// The rendered operation bytes
    message: Vec<u8>,
@ -200,6 +222,20 @@ struct RenderedOperation {
    reply_private_route: Option<PublicKey>,
 }

+impl fmt::Debug for RenderedOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("RenderedOperation")
+            .field("message(len)", &self.message.len())
+            .field("destination_node_ref", &self.destination_node_ref)
+            .field("node_ref", &self.node_ref)
+            .field("hop_count", &self.hop_count)
+            .field("safety_route", &self.safety_route)
+            .field("remote_private_route", &self.remote_private_route)
+            .field("reply_private_route", &self.reply_private_route)
+            .finish()
+    }
+}
+
 /// Node information exchanged during every RPC message
 #[derive(Default, Debug, Clone)]
 pub struct SenderPeerInfo {
@ -431,7 +467,7 @@ impl RPCProcessor {
                    .await
                {
                    Ok(v) => {
-                        let v = network_result_value_or_log!(v => {
+                        let v = network_result_value_or_log!(v => [ format!(": node_id={} count={} fanout={} fanout={} safety_selection={:?}", node_id, count, fanout, timeout_us, safety_selection) ] {
                            // Any other failures, just try the next node
                            return Ok(None);
                        });
@ -525,10 +561,14 @@ impl RPCProcessor {
        })
    }

-    #[instrument(level = "trace", skip(self, waitable_reply), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, waitable_reply), err)
+    )]
    async fn wait_for_reply(
        &self,
        waitable_reply: WaitableReply,
+        debug_string: String,
    ) -> Result<TimeoutOr<(RPCMessage, TimestampDuration)>, RPCError> {
        let out = self
            .unlocked_inner
@ -536,7 +576,18 @@ impl RPCProcessor {
            .wait_for_op(waitable_reply.handle, waitable_reply.timeout_us)
            .await;
        match &out {
-            Err(_) | Ok(TimeoutOr::Timeout) => {
+            Err(e) => {
+                log_rpc!(debug "RPC Lost ({}): {}", debug_string, e);
+                self.record_question_lost(
+                    waitable_reply.send_ts,
+                    waitable_reply.node_ref.clone(),
+                    waitable_reply.safety_route,
+                    waitable_reply.remote_private_route,
+                    waitable_reply.reply_private_route,
+                );
+            }
+            Ok(TimeoutOr::Timeout) => {
+                log_rpc!(debug "RPC Lost ({}): Timeout", debug_string);
                self.record_question_lost(
                    waitable_reply.send_ts,
                    waitable_reply.node_ref.clone(),
@ -648,7 +699,10 @@ impl RPCProcessor {
    /// Produce a byte buffer that represents the wire encoding of the entire
    /// unencrypted envelope body for a RPC message. This incorporates
    /// wrapping a private and/or safety route if they are specified.
-    #[instrument(level = "debug", skip(self, operation), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "debug", skip(self, operation), err)
+    )]
    fn render_operation(
        &self,
        dest: Destination,
@ -708,6 +762,10 @@ impl RPCProcessor {
                        if sequencing > node_ref.sequencing() {
                            node_ref.set_sequencing(sequencing)
                        }
+                        let mut destination_node_ref = destination_node_ref.clone();
+                        if sequencing > destination_node_ref.sequencing() {
+                            destination_node_ref.set_sequencing(sequencing)
+                        }

                        // Reply private route should be None here, even for questions
                        assert!(reply_private_route.is_none());
@ -776,7 +834,10 @@ impl RPCProcessor {
    /// routing table caching when it is okay to do so
    /// Also check target's timestamp of our own node info, to see if we should send that
    /// And send our timestamp of the target's node info so they can determine if they should update us on their next rpc
-    #[instrument(level = "trace", skip(self), ret)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret)
+    )]
    fn get_sender_peer_info(&self, dest: &Destination) -> SenderPeerInfo {
        // Don't do this if the sender is to remain private
        // Otherwise we would be attaching the original sender's identity to the final destination,
@ -1077,7 +1138,10 @@ impl RPCProcessor {

    /// Issue a question over the network, possibly using an anonymized route
    /// Optionally keeps a context to be passed to the answer processor when an answer is received
-    #[instrument(level = "debug", skip(self, question), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "debug", skip(self, question), err)
+    )]
    async fn question(
        &self,
        dest: Destination,
@ -1092,7 +1156,8 @@ impl RPCProcessor {
        let op_id = operation.op_id();

        // Log rpc send
-        trace!(target: "rpc_message", dir = "send", kind = "question", op_id = op_id.as_u64(), desc = operation.kind().desc(), ?dest);
+        #[cfg(feature = "verbose-tracing")]
+        debug!(target: "rpc_message", dir = "send", kind = "question", op_id = op_id.as_u64(), desc = operation.kind().desc(), ?dest);

        // Produce rendered operation
        let RenderedOperation {
@ -1118,17 +1183,31 @@ impl RPCProcessor {
        // Send question
        let bytes: ByteCount = (message.len() as u64).into();
        let send_ts = get_aligned_timestamp();
-        let send_data_kind = network_result_try!(self
+        #[allow(unused_variables)]
+        let message_len = message.len();
+        let res = self
            .network_manager()
-            .send_envelope(node_ref.clone(), Some(destination_node_ref), message)
+            .send_envelope(
+                node_ref.clone(),
+                Some(destination_node_ref.clone()),
+                message,
+            )
            .await
            .map_err(|e| {
                // If we're returning an error, clean up
-                self.record_send_failure(RPCKind::Question, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                self.record_send_failure(
+                    RPCKind::Question,
+                    send_ts,
+                    node_ref.clone(),
+                    safety_route,
+                    remote_private_route,
+                );
                RPCError::network(e)
-            })? => {
+            })?;
+        let send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] {
                // If we couldn't send we're still cleaning up
                self.record_send_failure(RPCKind::Question, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                network_result_raise!(res);
            }
        );

@ -1156,7 +1235,10 @@ impl RPCProcessor {
    }

    /// Issue a statement over the network, possibly using an anonymized route
-    #[instrument(level = "debug", skip(self, statement), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "debug", skip(self, statement), err)
+    )]
    async fn statement(
        &self,
        dest: Destination,
@ -1169,7 +1251,8 @@ impl RPCProcessor {
        let operation = RPCOperation::new_statement(statement, spi);

        // Log rpc send
-        trace!(target: "rpc_message", dir = "send", kind = "statement", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest);
+        #[cfg(feature = "verbose-tracing")]
+        debug!(target: "rpc_message", dir = "send", kind = "statement", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest);

        // Produce rendered operation
        let RenderedOperation {
@ -1185,17 +1268,31 @@ impl RPCProcessor {
        // Send statement
        let bytes: ByteCount = (message.len() as u64).into();
        let send_ts = get_aligned_timestamp();
-        let _send_data_kind = network_result_try!(self
+        #[allow(unused_variables)]
+        let message_len = message.len();
+        let res = self
            .network_manager()
-            .send_envelope(node_ref.clone(), Some(destination_node_ref), message)
+            .send_envelope(
+                node_ref.clone(),
+                Some(destination_node_ref.clone()),
+                message,
+            )
            .await
            .map_err(|e| {
                // If we're returning an error, clean up
-                self.record_send_failure(RPCKind::Statement, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                self.record_send_failure(
+                    RPCKind::Statement,
+                    send_ts,
+                    node_ref.clone(),
+                    safety_route,
+                    remote_private_route,
+                );
                RPCError::network(e)
-            })? => {
+            })?;
+        let _send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] {
                // If we couldn't send we're still cleaning up
                self.record_send_failure(RPCKind::Statement, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                network_result_raise!(res);
            }
        );

@ -1213,7 +1310,10 @@ impl RPCProcessor {
    }
    /// Issue a reply over the network, possibly using an anonymized route
    /// The request must want a response, or this routine fails
-    #[instrument(level = "debug", skip(self, request, answer), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "debug", skip(self, request, answer), err)
+    )]
    async fn answer(
        &self,
        request: RPCMessage,
@ -1229,7 +1329,8 @@ impl RPCProcessor {
        let operation = RPCOperation::new_answer(&request.operation, answer, spi);

        // Log rpc send
-        trace!(target: "rpc_message", dir = "send", kind = "answer", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest);
+        #[cfg(feature = "verbose-tracing")]
+        debug!(target: "rpc_message", dir = "send", kind = "answer", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest);

        // Produce rendered operation
        let RenderedOperation {
@ -1245,16 +1346,31 @@ impl RPCProcessor {
        // Send the reply
        let bytes: ByteCount = (message.len() as u64).into();
        let send_ts = get_aligned_timestamp();
-        network_result_try!(self.network_manager()
-            .send_envelope(node_ref.clone(), Some(destination_node_ref), message)
+        #[allow(unused_variables)]
+        let message_len = message.len();
+        let res = self
+            .network_manager()
+            .send_envelope(
+                node_ref.clone(),
+                Some(destination_node_ref.clone()),
+                message,
+            )
            .await
            .map_err(|e| {
                // If we're returning an error, clean up
-                self.record_send_failure(RPCKind::Answer, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                self.record_send_failure(
+                    RPCKind::Answer,
+                    send_ts,
+                    node_ref.clone(),
+                    safety_route,
+                    remote_private_route,
+                );
                RPCError::network(e)
-            })? => {
+            })?;
+        let _send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] {
                // If we couldn't send we're still cleaning up
                self.record_send_failure(RPCKind::Answer, send_ts, node_ref.clone(), safety_route, remote_private_route);
+                network_result_raise!(res);
            }
        );

@ -1321,7 +1437,10 @@ impl RPCProcessor {
    }

    //////////////////////////////////////////////////////////////////////
-    #[instrument(level = "trace", skip(self, encoded_msg), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, encoded_msg), err)
+    )]
    async fn process_rpc_message(
        &self,
        encoded_msg: RPCMessageEncoded,
@ -1474,7 +1593,8 @@ impl RPCProcessor {
            let rpc_worker_span = span!(parent: None, Level::TRACE, "rpc_worker recv");
            // xxx: causes crash (Missing otel data span extensions)
            // rpc_worker_span.follows_from(span_id);
-            let res = match self
+
+            network_result_value_or_log!(match self
                .process_rpc_message(msg)
                .instrument(rpc_worker_span)
                .await
@ -1485,13 +1605,14 @@ impl RPCProcessor {
                }

                Ok(v) => v,
-            };
-
-            network_result_value_or_log!(res => {});
+            } => [ format!(": msg.header={:?}", msg.header) ] {});
        }
    }

-    #[instrument(level = "trace", skip(self, body), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, body), err)
+    )]
    pub fn enqueue_direct_message(
        &self,
        envelope: Envelope,
@ -1500,19 +1621,22 @@ impl RPCProcessor {
        routing_domain: RoutingDomain,
        body: Vec<u8>,
    ) -> EyreResult<()> {
+        let header = RPCMessageHeader {
+            detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect {
+                envelope,
+                peer_noderef,
+                connection_descriptor,
+                routing_domain,
+            }),
+            timestamp: get_aligned_timestamp(),
+            body_len: ByteCount::new(body.len() as u64),
+        };
+
        let msg = RPCMessageEncoded {
-            header: RPCMessageHeader {
-                detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect {
-                    envelope,
-                    peer_noderef,
-                    connection_descriptor,
-                    routing_domain,
-                }),
-                timestamp: get_aligned_timestamp(),
-                body_len: ByteCount::new(body.len() as u64),
-            },
+            header,
            data: RPCMessageData { contents: body },
        };
+
        let send_channel = {
            let inner = self.inner.lock();
            inner.send_channel.as_ref().unwrap().clone()
@ -1524,7 +1648,10 @@ impl RPCProcessor {
        Ok(())
    }

-    #[instrument(level = "trace", skip(self, body), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, body), err)
+    )]
    fn enqueue_safety_routed_message(
        &self,
        direct: RPCMessageHeaderDetailDirect,
@ -1532,16 +1659,18 @@ impl RPCProcessor {
        sequencing: Sequencing,
        body: Vec<u8>,
    ) -> EyreResult<()> {
+        let header = RPCMessageHeader {
+            detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted {
+                direct,
+                remote_safety_route,
+                sequencing,
+            }),
+            timestamp: get_aligned_timestamp(),
+            body_len: (body.len() as u64).into(),
+        };
+
        let msg = RPCMessageEncoded {
-            header: RPCMessageHeader {
-                detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted {
-                    direct,
-                    remote_safety_route,
-                    sequencing,
-                }),
-                timestamp: get_aligned_timestamp(),
-                body_len: (body.len() as u64).into(),
-            },
+            header,
            data: RPCMessageData { contents: body },
        };
        let send_channel = {
@ -1555,7 +1684,10 @@ impl RPCProcessor {
        Ok(())
    }

-    #[instrument(level = "trace", skip(self, body), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, body), err)
+    )]
    fn enqueue_private_routed_message(
        &self,
        direct: RPCMessageHeaderDetailDirect,
@ -1564,21 +1696,22 @@ impl RPCProcessor {
        safety_spec: SafetySpec,
        body: Vec<u8>,
    ) -> EyreResult<()> {
+        let header = RPCMessageHeader {
+            detail: RPCMessageHeaderDetail::PrivateRouted(RPCMessageHeaderDetailPrivateRouted {
+                direct,
+                remote_safety_route,
+                private_route,
+                safety_spec,
+            }),
+            timestamp: get_aligned_timestamp(),
+            body_len: (body.len() as u64).into(),
+        };
+
        let msg = RPCMessageEncoded {
-            header: RPCMessageHeader {
-                detail: RPCMessageHeaderDetail::PrivateRouted(
-                    RPCMessageHeaderDetailPrivateRouted {
-                        direct,
-                        remote_safety_route,
-                        private_route,
-                        safety_spec,
-                    },
-                ),
-                timestamp: get_aligned_timestamp(),
-                body_len: (body.len() as u64).into(),
-            },
+            header,
            data: RPCMessageData { contents: body },
        };
+
        let send_channel = {
            let inner = self.inner.lock();
            inner.send_channel.as_ref().unwrap().clone()
--- a/veilid-core/src/rpc_processor/operation_waiter.rs
+++ b/veilid-core/src/rpc_processor/operation_waiter.rs
@ -114,7 +114,10 @@ where
    }

    /// Complete the app call
-    #[instrument(level = "trace", skip(self, message), err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, message), err)
+    )]
    pub async fn complete_op_waiter(&self, op_id: OperationId, message: T) -> Result<(), RPCError> {
        let waiting_op = {
            let mut inner = self.inner.lock();
@ -152,7 +155,7 @@ where
            .into_timeout_or();
        Ok(res
            .on_timeout(|| {
-                log_rpc!(debug "op wait timed out: {}", handle.op_id);
+                // log_rpc!(debug "op wait timed out: {}", handle.op_id);
                // debug_print_backtrace();
                self.cancel_op_waiter(handle.op_id);
            })
--- a/veilid-core/src/rpc_processor/rpc_app_call.rs
+++ b/veilid-core/src/rpc_processor/rpc_app_call.rs
@ -3,12 +3,17 @@ use super::*;
 impl RPCProcessor {
    // Sends a high level app request and wait for response
    // Can be sent via all methods including relays and routes
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, message), fields(message.len = message.len(), ret.latency, ret.len), err)
+    )]
    pub async fn rpc_call_app_call(
        self,
        dest: Destination,
        message: Vec<u8>,
    ) -> Result<NetworkResult<Answer<Vec<u8>>>, RPCError> {
+        let debug_string = format!("AppCall(message(len)={}) => {}", message.len(), dest);
+
        let app_call_q = RPCOperationAppCallQ::new(message)?;
        let question = RPCQuestion::new(
            network_result_try!(self.get_destination_respond_to(&dest)?),
@ -19,7 +24,7 @@ impl RPCProcessor {
        let waitable_reply = network_result_try!(self.question(dest, question, None).await?);

        // Wait for reply
-        let (msg, latency) = match self.wait_for_reply(waitable_reply).await? {
+        let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? {
            TimeoutOr::Timeout => return Ok(NetworkResult::Timeout),
            TimeoutOr::Value(v) => v,
        };
@ -36,10 +41,14 @@ impl RPCProcessor {

        let a_message = app_call_a.destructure();

+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.latency", latency.as_u64());
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.len", a_message.len());
        Ok(NetworkResult::value(Answer::new(latency, a_message)))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_app_call_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_app_message.rs
+++ b/veilid-core/src/rpc_processor/rpc_app_message.rs
@ -3,7 +3,10 @@ use super::*;
 impl RPCProcessor {
    // Sends a high level app message
    // Can be sent via all methods including relays and routes
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, message), fields(message.len = message.len()), err)
+    )]
    pub async fn rpc_call_app_message(
        self,
        dest: Destination,
@ -16,7 +19,7 @@ impl RPCProcessor {
        self.statement(dest, statement).await
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_app_message(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs
+++ b/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_cancel_tunnel_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs
+++ b/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_complete_tunnel_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_find_block.rs
+++ b/veilid-core/src/rpc_processor/rpc_find_block.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_find_block_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_find_node.rs
+++ b/veilid-core/src/rpc_processor/rpc_find_node.rs
@ -7,7 +7,10 @@ impl RPCProcessor {
    /// Because this leaks information about the identity of the node itself,
    /// replying to this request received over a private route will leak
    /// the identity of the node and defeat the private route.
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), err)
+    )]
    pub async fn rpc_call_find_node(
        self,
        dest: Destination,
@ -32,11 +35,13 @@ impl RPCProcessor {
            find_node_q_detail,
        );

+        let debug_string = format!("FindNode(node_id={}) => {}", node_id, dest);
+
        // Send the find_node request
        let waitable_reply = network_result_try!(self.question(dest, find_node_q, None).await?);

        // Wait for reply
-        let (msg, latency) = match self.wait_for_reply(waitable_reply).await? {
+        let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? {
            TimeoutOr::Timeout => return Ok(NetworkResult::Timeout),
            TimeoutOr::Value(v) => v,
        };
@ -65,7 +70,7 @@ impl RPCProcessor {
        Ok(NetworkResult::value(Answer::new(latency, peers)))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_find_node_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_get_value.rs
+++ b/veilid-core/src/rpc_processor/rpc_get_value.rs
@ -15,7 +15,17 @@ impl RPCProcessor {
    /// Because this leaks information about the identity of the node itself,
    /// replying to this request received over a private route will leak
    /// the identity of the node and defeat the private route.
-    #[instrument(level = "trace", skip(self), ret, err)]
+    
+    #[cfg_attr(
+        feature = "verbose-tracing",        
+        instrument(level = "trace", skip(self, last_descriptor), 
+            fields(ret.value.data.len, 
+                ret.value.data.seq, 
+                ret.value.data.writer, 
+                ret.peers.len,
+                ret.latency
+            ),err)
+    )]
    pub async fn rpc_call_get_value(
        self,
        dest: Destination,
@ -39,6 +49,18 @@ impl RPCProcessor {
            return Err(RPCError::internal("No node id for crypto kind"));
        };

+        let debug_string = format!(
+            "OUT ==> GetValueQ({} #{}{}) => {}",
+            key,
+            subkey,
+            if last_descriptor.is_some() {
+                " +lastdesc"
+            } else {
+                ""
+            },
+            dest
+        );
+
        // Send the getvalue question
        let get_value_q = RPCOperationGetValueQ::new(key, subkey, last_descriptor.is_none());
        let question = RPCQuestion::new(
@ -52,13 +74,15 @@ impl RPCProcessor {
            vcrypto: vcrypto.clone(),
        });

+        log_rpc!(debug "{}", debug_string);
+
        let waitable_reply = network_result_try!(
            self.question(dest, question, Some(question_context))
                .await?
        );

        // Wait for reply
-        let (msg, latency) = match self.wait_for_reply(waitable_reply).await? {
+        let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? {
            TimeoutOr::Timeout => return Ok(NetworkResult::Timeout),
            TimeoutOr::Value(v) => v,
        };
@ -75,6 +99,28 @@ impl RPCProcessor {

        let (value, peers, descriptor) = get_value_a.destructure();

+        let debug_string_value = value.as_ref().map(|v| {
+            format!(" len={} writer={}",
+                v.value_data().data().len(),
+                v.value_data().writer(),
+            )
+        }).unwrap_or_default();
+        
+        let debug_string_answer = format!(
+            "OUT <== GetValueA({} #{}{}{} peers={})",
+            key,
+            subkey,
+            debug_string_value,
+            if descriptor.is_some() {
+                " +desc"
+            } else {
+                ""
+            },
+            peers.len(),
+        );
+
+        log_rpc!(debug "{}", debug_string_answer);
+
        // Validate peers returned are, in fact, closer to the key than the node we sent this to
        let valid = match RoutingTable::verify_peers_closer(vcrypto, target_node_id, key, &peers) {
            Ok(v) => v,
@ -89,6 +135,17 @@ impl RPCProcessor {
            return Ok(NetworkResult::invalid_message("non-closer peers returned"));
        }

+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.latency", latency.as_u64());
+        #[cfg(feature = "verbose-tracing")]
+        if let Some(value) = &value {
+            tracing::Span::current().record("ret.value.data.len", value.value_data().data().len());
+            tracing::Span::current().record("ret.value.data.seq", value.value_data().seq());
+            tracing::Span::current().record("ret.value.data.writer", value.value_data().writer().to_string());
+        }
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.peers.len", peers.len());
+
        Ok(NetworkResult::value(Answer::new(
            latency,
            GetValueAnswer {
@ -99,7 +156,7 @@ impl RPCProcessor {
        )))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_get_value_q(
        &self,
        msg: RPCMessage,
@ -131,6 +188,20 @@ impl RPCProcessor {
        let routing_table = self.routing_table();
        let closer_to_key_peers = network_result_try!(routing_table.find_peers_closer_to_key(key));

+        let debug_string = format!(
+            "IN <=== GetValueQ({} #{}{}) <== {}",
+            key,
+            subkey,
+            if want_descriptor {
+                " +wantdesc"
+            } else {
+                ""
+            },
+            msg.header.direct_sender_node_id()
+        );
+
+        log_rpc!(debug "{}", debug_string);
+
        // See if we have this record ourselves
        let storage_manager = self.storage_manager();
        let subkey_result = network_result_try!(storage_manager
@ -138,6 +209,29 @@ impl RPCProcessor {
            .await
            .map_err(RPCError::internal)?);
        
+        let debug_string_value = subkey_result.value.as_ref().map(|v| {
+            format!(" len={} writer={}",
+                v.value_data().data().len(),
+                v.value_data().writer(),
+            )
+        }).unwrap_or_default();
+
+        let debug_string_answer = format!(
+            "IN ===> GetValueA({} #{}{}{} peers={}) ==> {}",
+            key,
+            subkey,
+            debug_string_value,
+            if subkey_result.descriptor.is_some() {
+                " +desc"
+            } else {
+                ""
+            },
+            closer_to_key_peers.len(),
+            msg.header.direct_sender_node_id()
+        );
+    
+        log_rpc!(debug "{}", debug_string_answer);
+            
        // Make GetValue answer
        let get_value_a = RPCOperationGetValueA::new(
            subkey_result.value,
--- a/veilid-core/src/rpc_processor/rpc_return_receipt.rs
+++ b/veilid-core/src/rpc_processor/rpc_return_receipt.rs
@ -3,7 +3,10 @@ use super::*;
 impl RPCProcessor {
    // Sends a unidirectional in-band return receipt
    // Can be sent via all methods including relays and routes
-    #[instrument(level = "trace", skip(self, receipt), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self, receipt), ret, err)
+    )]
    pub async fn rpc_call_return_receipt<D: AsRef<[u8]>>(
        self,
        dest: Destination,
@ -20,7 +23,7 @@ impl RPCProcessor {
        Ok(NetworkResult::value(()))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_return_receipt(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_route.rs
+++ b/veilid-core/src/rpc_processor/rpc_route.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    async fn process_route_safety_route_hop(
        &self,
        routed_operation: RoutedOperation,
@ -52,7 +52,7 @@ impl RPCProcessor {
            .await
    }

-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    async fn process_route_private_route_hop(
        &self,
        routed_operation: RoutedOperation,
@ -99,7 +99,7 @@ impl RPCProcessor {
    /// Note: it is important that we never respond with a safety route to questions that come
    /// in without a private route. Giving away a safety route when the node id is known is
    /// a privacy violation!
-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    fn process_safety_routed_operation(
        &self,
        detail: RPCMessageHeaderDetailDirect,
@ -141,7 +141,7 @@ impl RPCProcessor {
    }

    /// Process a routed operation that came in over both a safety route and a private route
-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    fn process_private_routed_operation(
        &self,
        detail: RPCMessageHeaderDetailDirect,
@ -209,7 +209,7 @@ impl RPCProcessor {
        Ok(NetworkResult::value(()))
    }

-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    fn process_routed_operation(
        &self,
        detail: RPCMessageHeaderDetailDirect,
@ -239,7 +239,7 @@ impl RPCProcessor {
            )
        }
    }
-    #[instrument(level = "trace", skip_all, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))]
    pub(crate) async fn process_private_route_first_hop(
        &self,
        mut routed_operation: RoutedOperation,
@ -360,7 +360,7 @@ impl RPCProcessor {
        Ok(NetworkResult::value(route_hop))
    }

-    #[instrument(level = "trace", skip(self, msg), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))]
    pub(crate) async fn process_route(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_set_value.rs
+++ b/veilid-core/src/rpc_processor/rpc_set_value.rs
@ -14,6 +14,20 @@ impl RPCProcessor {
    /// Because this leaks information about the identity of the node itself,
    /// replying to this request received over a private route will leak
    /// the identity of the node and defeat the private route.
+    #[cfg_attr(
+        feature = "verbose-tracing",        
+        instrument(level = "trace", skip(self, value, descriptor), 
+        fields(value.data.len = value.value_data().data().len(), 
+            value.data.seq = value.value_data().seq(), 
+            value.data.writer = value.value_data().writer().to_string(), 
+            ret.set,
+            ret.value.data.len, 
+            ret.value.data.seq, 
+            ret.value.data.writer, 
+            ret.peers.len,
+            ret.latency
+        ), err)
+    )]
    pub async fn rpc_call_set_value(
        self,
        dest: Destination,
@ -39,6 +53,20 @@ impl RPCProcessor {
            return Err(RPCError::internal("No node id for crypto kind"));
        };

+        let debug_string = format!(
+            "OUT ==> SetValueQ({} #{} len={} writer={}{}) => {}",
+            key,
+            subkey,
+            value.value_data().data().len(),
+            value.value_data().writer(),
+            if send_descriptor {
+                " +senddesc"
+            } else {
+                ""
+            },
+            dest
+        );
+
        // Send the setvalue question
        let set_value_q = RPCOperationSetValueQ::new(
            key,
@ -59,13 +87,17 @@ impl RPCProcessor {
            subkey,
            vcrypto: vcrypto.clone(),
        });
+
+        log_rpc!(debug "{}", debug_string);
+
        let waitable_reply = network_result_try!(
            self.question(dest, question, Some(question_context))
                .await?
        );

+
        // Wait for reply
-        let (msg, latency) = match self.wait_for_reply(waitable_reply).await? {
+        let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? {
            TimeoutOr::Timeout => return Ok(NetworkResult::Timeout),
            TimeoutOr::Value(v) => v,
        };
@ -82,6 +114,28 @@ impl RPCProcessor {

        let (set, value, peers) = set_value_a.destructure();
        
+        let debug_string_value = value.as_ref().map(|v| {
+            format!(" len={} writer={}",
+                v.value_data().data().len(),
+                v.value_data().writer(),
+            )
+        }).unwrap_or_default();
+        
+        let debug_string_answer = format!(
+            "OUT <== SetValueA({} #{}{}{} peers={})",
+            key,
+            subkey,
+            if set {
+                " +set"
+            } else {
+                ""
+            },
+            debug_string_value,
+            peers.len(),
+        );
+
+        log_rpc!(debug "{}", debug_string_answer);
+
        // Validate peers returned are, in fact, closer to the key than the node we sent this to
        let valid = match RoutingTable::verify_peers_closer(vcrypto, target_node_id, key, &peers) {
            Ok(v) => v,
@ -96,13 +150,26 @@ impl RPCProcessor {
            return Ok(NetworkResult::invalid_message("non-closer peers returned"));
        }

+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.latency", latency.as_u64());
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.set", set);
+        #[cfg(feature = "verbose-tracing")]
+        if let Some(value) = &value {
+            tracing::Span::current().record("ret.value.data.len", value.value_data().data().len());
+            tracing::Span::current().record("ret.value.data.seq", value.value_data().seq());
+            tracing::Span::current().record("ret.value.data.writer", value.value_data().writer().to_string());
+        }
+        #[cfg(feature = "verbose-tracing")]
+        tracing::Span::current().record("ret.peers.len", peers.len());
+
        Ok(NetworkResult::value(Answer::new(
            latency,
            SetValueAnswer { set, value, peers },
        )))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_set_value_q(
        &self,
        msg: RPCMessage,
@ -134,6 +201,22 @@ impl RPCProcessor {
        let routing_table = self.routing_table();
        let closer_to_key_peers = network_result_try!(routing_table.find_peers_closer_to_key(key));

+        let debug_string = format!(
+            "IN <=== SetValueQ({} #{} len={} writer={}{}) <== {}",
+            key,
+            subkey,
+            value.value_data().data().len(),
+            value.value_data().writer(),
+            if descriptor.is_some() {
+                " +desc"
+            } else {
+                ""
+            },
+            msg.header.direct_sender_node_id()
+        );
+
+        log_rpc!(debug "{}", debug_string);
+
        // If there are less than 'set_value_count' peers that are closer, then store here too
        let set_value_count = {
            let c = self.config.get();
@ -155,6 +238,29 @@ impl RPCProcessor {
            (true, new_value)
        };

+        let debug_string_value = new_value.as_ref().map(|v| {
+            format!(" len={} writer={}",
+                v.value_data().data().len(),
+                v.value_data().writer(),
+            )
+        }).unwrap_or_default();
+
+        let debug_string_answer = format!(
+            "IN ===> SetValueA({} #{}{}{} peers={}) ==> {}",
+            key,
+            subkey,
+            if set {
+                " +set"
+            } else {
+                ""
+            },
+            debug_string_value,
+            closer_to_key_peers.len(),
+            msg.header.direct_sender_node_id()
+        );
+    
+        log_rpc!(debug "{}", debug_string_answer);
+
        // Make SetValue answer
        let set_value_a = RPCOperationSetValueA::new(set, new_value, closer_to_key_peers)?;

--- a/veilid-core/src/rpc_processor/rpc_signal.rs
+++ b/veilid-core/src/rpc_processor/rpc_signal.rs
@ -3,7 +3,10 @@ use super::*;
 impl RPCProcessor {
    // Sends a unidirectional signal to a node
    // Can be sent via relays but not routes. For routed 'signal' like capabilities, use AppMessage.
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret, err)
+    )]
    pub async fn rpc_call_signal(
        self,
        dest: Destination,
@ -29,7 +32,7 @@ impl RPCProcessor {
        self.statement(dest, statement).await
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_signal(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_start_tunnel.rs
+++ b/veilid-core/src/rpc_processor/rpc_start_tunnel.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_start_tunnel_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_status.rs
+++ b/veilid-core/src/rpc_processor/rpc_status.rs
@ -15,7 +15,10 @@ impl RPCProcessor {
    // direct -> node status + sender info
    // safety -> node status
    // private -> nothing
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret, err)
+    )]
    pub async fn rpc_call_status(
        self,
        dest: Destination,
@ -101,6 +104,8 @@ impl RPCProcessor {
            RPCQuestionDetail::StatusQ(status_q),
        );

+        let debug_string = format!("Status => {}", dest);
+
        // Send the info request
        let waitable_reply =
            network_result_try!(self.question(dest.clone(), question, None).await?);
@ -109,7 +114,7 @@ impl RPCProcessor {
        let send_data_kind = waitable_reply.send_data_kind;

        // Wait for reply
-        let (msg, latency) = match self.wait_for_reply(waitable_reply).await? {
+        let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? {
            TimeoutOr::Timeout => return Ok(NetworkResult::Timeout),
            TimeoutOr::Value(v) => v,
        };
@ -208,7 +213,7 @@ impl RPCProcessor {
        Ok(NetworkResult::value(Answer::new(latency, opt_sender_info)))
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_status_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_supply_block.rs
+++ b/veilid-core/src/rpc_processor/rpc_supply_block.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_supply_block_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs
+++ b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs
@ -2,7 +2,10 @@ use super::*;

 impl RPCProcessor {
    // Can only be sent directly, not via relays or routes
-    #[instrument(level = "trace", skip(self), ret, err)]
+    #[cfg_attr(
+        feature = "verbose-tracing",
+        instrument(level = "trace", skip(self), ret, err)
+    )]
    pub async fn rpc_call_validate_dial_info(
        self,
        peer: NodeRef,
@ -23,7 +26,7 @@ impl RPCProcessor {
        // Send the validate_dial_info request
        // This can only be sent directly, as relays can not validate dial info
        network_result_value_or_log!(self.statement(Destination::direct(peer), statement)
-            .await? => {
+            .await? => [ format!(": peer={} statement={:?}", peer, statement) ] {
                return Ok(false);
            }
        );
@ -33,7 +36,7 @@ impl RPCProcessor {
            ReceiptEvent::ReturnedPrivate { private_route: _ }
            | ReceiptEvent::ReturnedInBand { inbound_noderef: _ }
            | ReceiptEvent::ReturnedSafety => {
-                log_net!(debug "validate_dial_info receipt should be returned out-of-band".green());
+                log_net!(debug "validate_dial_info receipt should be returned out-of-band");
                Ok(false)
            }
            ReceiptEvent::ReturnedOutOfBand => {
@ -41,7 +44,7 @@ impl RPCProcessor {
                Ok(true)
            }
            ReceiptEvent::Expired => {
-                log_net!(debug "validate_dial_info receipt expired".green());
+                log_net!(debug "validate_dial_info receipt expired");
                Ok(false)
            }
            ReceiptEvent::Cancelled => {
@ -50,7 +53,7 @@ impl RPCProcessor {
        }
    }

-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_validate_dial_info(
        &self,
        msg: RPCMessage,
@ -138,7 +141,7 @@ impl RPCProcessor {
                // Send the validate_dial_info request
                // This can only be sent directly, as relays can not validate dial info
                network_result_value_or_log!(self.statement(Destination::direct(peer), statement)
-                    .await? => {
+                    .await? => [ format!(": peer={} statement={:?}", peer, statement) ] {
                        continue;
                    }
                );
--- a/veilid-core/src/rpc_processor/rpc_value_changed.rs
+++ b/veilid-core/src/rpc_processor/rpc_value_changed.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), err))]
    pub(crate) async fn process_value_changed(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/rpc_processor/rpc_watch_value.rs
+++ b/veilid-core/src/rpc_processor/rpc_watch_value.rs
@ -1,7 +1,7 @@
 use super::*;

 impl RPCProcessor {
-    #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)]
+    #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))]
    pub(crate) async fn process_watch_value_q(
        &self,
        msg: RPCMessage,
--- a/veilid-core/src/storage_manager/debug.rs
+++ b/veilid-core/src/storage_manager/debug.rs
@ -15,4 +15,24 @@ impl StorageManager {
        };
        remote_record_store.debug_records()
    }
+    pub(crate) async fn purge_local_records(&self, reclaim: Option<usize>) -> String {
+        let mut inner = self.inner.lock().await;
+        let Some(local_record_store) = &mut inner.local_record_store else {
+            return "not initialized".to_owned();
+        };
+        let reclaimed = local_record_store
+            .reclaim_space(reclaim.unwrap_or(usize::MAX))
+            .await;
+        return format!("Local records purged: reclaimed {} bytes", reclaimed);
+    }
+    pub(crate) async fn purge_remote_records(&self, reclaim: Option<usize>) -> String {
+        let mut inner = self.inner.lock().await;
+        let Some(remote_record_store) = &mut inner.remote_record_store else {
+            return "not initialized".to_owned();
+        };
+        let reclaimed = remote_record_store
+            .reclaim_space(reclaim.unwrap_or(usize::MAX))
+            .await;
+        return format!("Remote records purged: reclaimed {} bytes", reclaimed);
+    }
 }
--- a/veilid-core/src/storage_manager/get_value.rs
+++ b/veilid-core/src/storage_manager/get_value.rs
@ -58,13 +58,13 @@ impl StorageManager {
                let vres = rpc_processor
                    .clone()
                    .rpc_call_get_value(
-                        Destination::direct(next_node).with_safety(safety_selection),
+                        Destination::direct(next_node.clone()).with_safety(safety_selection),
                        key,
                        subkey,
                        last_descriptor,
                    )
                    .await?;
-                let gva = network_result_value_or_log!(vres => {
+                let gva = network_result_value_or_log!(vres => [ format!(": next_node={} safety_selection={:?} key={} subkey={}", next_node, safety_selection, key, subkey) ] {
                    // Any other failures, just try the next node
                    return Ok(None);
                });
--- a/veilid-core/src/storage_manager/limited_size.rs
+++ b/veilid-core/src/storage_manager/limited_size.rs
@ -0,0 +1,111 @@
+use super::*;
+use num_traits::{PrimInt, Unsigned};
+
+#[derive(ThisError, Debug, Clone, Copy, Eq, PartialEq)]
+pub enum LimitError {
+    #[error("limit overflow")]
+    OverLimit,
+}
+
+#[derive(ThisError, Debug, Clone, Copy, Eq, PartialEq)]
+pub enum NumericError {
+    #[error("numeric overflow")]
+    Overflow,
+    #[error("numeric underflow")]
+    Underflow,
+}
+
+#[derive(Debug, Clone)]
+pub struct LimitedSize<T: PrimInt + Unsigned + fmt::Display + fmt::Debug> {
+    description: String,
+    value: T,
+    limit: Option<T>,
+    uncommitted_value: Option<T>,
+}
+
+impl<T: PrimInt + Unsigned + fmt::Display + fmt::Debug> LimitedSize<T> {
+    pub fn new(description: &str, value: T, limit: Option<T>) -> Self {
+        Self {
+            description: description.to_owned(),
+            value,
+            limit,
+            uncommitted_value: None,
+        }
+    }
+
+    fn current_value(&self) -> T {
+        self.uncommitted_value.unwrap_or(self.value)
+    }
+
+    pub fn set(&mut self, new_value: T) {
+        self.uncommitted_value = Some(new_value);
+    }
+
+    pub fn add(&mut self, v: T) -> Result<T, NumericError> {
+        let current_value = self.current_value();
+        let max_v = T::max_value() - current_value;
+        if v > max_v {
+            return Err(NumericError::Overflow);
+        }
+        let new_value = current_value + v;
+        self.uncommitted_value = Some(new_value);
+        Ok(new_value)
+    }
+    pub fn sub(&mut self, v: T) -> Result<T, NumericError> {
+        let current_value = self.current_value();
+        let max_v = current_value - T::min_value();
+        if v > max_v {
+            return Err(NumericError::Underflow);
+        }
+        let new_value = current_value - v;
+        self.uncommitted_value = Some(new_value);
+        Ok(new_value)
+    }
+    pub fn saturating_sub(&mut self, mut v: T) -> T {
+        let current_value = self.current_value();
+        let max_v = current_value - T::min_value();
+        if v > max_v {
+            log_stor!(debug "Numeric underflow ({})", self.description);
+            v = max_v;
+        }
+        let new_value = current_value - v;
+        self.uncommitted_value = Some(new_value);
+        new_value
+    }
+
+    pub fn check_limit(&self) -> bool {
+        if let Some(uncommitted_value) = self.uncommitted_value {
+            if let Some(limit) = self.limit {
+                if uncommitted_value > limit {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
+    pub fn commit(&mut self) -> Result<T, LimitError> {
+        if let Some(uncommitted_value) = self.uncommitted_value {
+            if let Some(limit) = self.limit {
+                if uncommitted_value > limit {
+                    log_stor!(debug "Commit over limit failed ({}): {} > {}", self.description, uncommitted_value, limit);
+                    return Err(LimitError::OverLimit);
+                }
+            }
+            log_stor!(debug "Commit ({}): {} => {}", self.description, self.value, uncommitted_value);
+            self.value = uncommitted_value;
+        }
+        Ok(self.value)
+    }
+
+    pub fn rollback(&mut self) -> T {
+        if let Some(uv) = self.uncommitted_value {
+            log_stor!(debug "Rollback ({}): {} (drop {})", self.description, self.value, uv);
+        }
+        return self.value;
+    }
+
+    pub fn get(&self) -> T {
+        return self.value;
+    }
+}
--- a/veilid-core/src/storage_manager/mod.rs
+++ b/veilid-core/src/storage_manager/mod.rs
@ -1,6 +1,7 @@
 mod debug;
 mod get_value;
 mod keys;
+mod limited_size;
 mod record_store;
 mod record_store_limits;
 mod set_value;
@ -9,6 +10,7 @@ mod tasks;
 mod types;

 use keys::*;
+use limited_size::*;
 use record_store::*;
 use record_store_limits::*;
 use storage_manager_inner::*;
@ -95,6 +97,7 @@ impl StorageManager {
        Ok(())
    }

+    #[instrument(level = "debug", skip_all)]
    pub async fn terminate(&self) {
        debug!("starting storage manager shutdown");

--- a/veilid-core/src/storage_manager/record_store.rs
+++ b/veilid-core/src/storage_manager/record_store.rs
@ -21,8 +21,8 @@ where
    subkey_table: Option<TableDB>,
    record_index: LruCache<RecordTableKey, Record<D>>,
    subkey_cache: LruCache<SubkeyTableKey, RecordData>,
-    subkey_cache_total_size: usize,
-    total_storage_space: usize,
+    subkey_cache_total_size: LimitedSize<usize>,
+    total_storage_space: LimitedSize<u64>,

    dead_records: Vec<(RecordTableKey, Record<D>)>,
    changed_records: HashSet<RecordTableKey>,
@ -47,6 +47,13 @@ where
 {
    pub fn new(table_store: TableStore, name: &str, limits: RecordStoreLimits) -> Self {
        let subkey_cache_size = limits.subkey_cache_size as usize;
+        let limit_subkey_cache_total_size = limits
+            .max_subkey_cache_memory_mb
+            .map(|mb| mb * 1_048_576usize);
+        let limit_max_storage_space = limits
+            .max_storage_space_mb
+            .map(|mb| mb as u64 * 1_048_576u64);
+
        Self {
            table_store,
            name: name.to_owned(),
@ -55,8 +62,16 @@ where
            subkey_table: None,
            record_index: LruCache::new(limits.max_records.unwrap_or(usize::MAX)),
            subkey_cache: LruCache::new(subkey_cache_size),
-            subkey_cache_total_size: 0,
-            total_storage_space: 0,
+            subkey_cache_total_size: LimitedSize::new(
+                "subkey_cache_total_size",
+                0,
+                limit_subkey_cache_total_size,
+            ),
+            total_storage_space: LimitedSize::new(
+                "total_storage_space",
+                0,
+                limit_max_storage_space,
+            ),
            dead_records: Vec::new(),
            changed_records: HashSet::new(),
            purge_dead_records_mutex: Arc::new(AsyncMutex::new(())),
@ -89,8 +104,17 @@ where
        let mut dead_records = Vec::new();
        for ri in record_index_saved {
            // total the storage space
-            self.total_storage_space += mem::size_of::<RecordTableKey>();
-            self.total_storage_space += ri.1.total_size();
+            self.total_storage_space
+                .add(mem::size_of::<RecordTableKey>() as u64)
+                .unwrap();
+            self.total_storage_space
+                .add(ri.1.total_size() as u64)
+                .unwrap();
+            if let Err(_) = self.total_storage_space.commit() {
+                // If we overflow the limit, kill off the record
+                dead_records.push((ri.0, ri.1));
+                continue;
+            }

            // add to index and ensure we deduplicate in the case of an error
            if let Some(v) = self.record_index.insert(ri.0, ri.1, |k, v| {
@ -130,24 +154,32 @@ where
            // Old data
            dead_size += old_record_data.total_size();
        }
-        self.subkey_cache_total_size -= dead_size;
-        self.subkey_cache_total_size += record_data_total_size;
+        self.subkey_cache_total_size.sub(dead_size).unwrap();
+        self.subkey_cache_total_size
+            .add(record_data_total_size)
+            .unwrap();

        // Purge over size limit
-        if let Some(max_subkey_cache_memory_mb) = self.limits.max_subkey_cache_memory_mb {
-            while self.subkey_cache_total_size > (max_subkey_cache_memory_mb * 1_048_576usize) {
-                if let Some((_, v)) = self.subkey_cache.remove_lru() {
-                    self.subkey_cache_total_size -= v.total_size();
-                } else {
-                    break;
-                }
+        while self.subkey_cache_total_size.commit().is_err() {
+            if let Some((_, v)) = self.subkey_cache.remove_lru() {
+                self.subkey_cache_total_size.saturating_sub(v.total_size());
+            } else {
+                self.subkey_cache_total_size.rollback();
+
+                log_stor!(error "subkey cache should not be empty, has {} bytes unaccounted for",self.subkey_cache_total_size.get());
+
+                self.subkey_cache_total_size.set(0);
+                self.subkey_cache_total_size.commit().unwrap();
+                break;
            }
        }
    }

    fn remove_from_subkey_cache(&mut self, key: SubkeyTableKey) {
        if let Some(dead_record_data) = self.subkey_cache.remove(&key) {
-            self.subkey_cache_total_size -= dead_record_data.total_size();
+            self.subkey_cache_total_size
+                .saturating_sub(dead_record_data.total_size());
+            self.subkey_cache_total_size.commit().unwrap();
        }
    }

@ -206,8 +238,11 @@ where
            }

            // Remove from total size
-            self.total_storage_space -= mem::size_of::<RecordTableKey>();
-            self.total_storage_space -= v.total_size();
+            self.total_storage_space
+                .saturating_sub(mem::size_of::<RecordTableKey>() as u64);
+            self.total_storage_space
+                .saturating_sub(v.total_size() as u64);
+            self.total_storage_space.commit().unwrap();
        }
        if let Err(e) = rt_xact.commit().await {
            log_stor!(error "failed to commit record table transaction: {}", e);
@ -258,12 +293,12 @@ where
        };

        // If over size limit, dont create record
-        let new_total_storage_space =
-            self.total_storage_space + mem::size_of::<RecordTableKey>() + record.total_size();
-        if let Some(max_storage_space_mb) = &self.limits.max_storage_space_mb {
-            if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) {
-                apibail_try_again!();
-            }
+        self.total_storage_space
+            .add((mem::size_of::<RecordTableKey>() + record.total_size()) as u64)
+            .unwrap();
+        if !self.total_storage_space.check_limit() {
+            self.total_storage_space.rollback();
+            apibail_try_again!();
        }

        // Save to record table
@ -286,7 +321,7 @@ where
        }

        // Update storage space
-        self.total_storage_space = new_total_storage_space;
+        self.total_storage_space.commit().unwrap();

        Ok(())
    }
@ -482,12 +517,14 @@ where
        }

        // Check new total storage space
-        let new_total_storage_space =
-            self.total_storage_space + new_record_data_size - prior_record_data_size;
-        if let Some(max_storage_space_mb) = self.limits.max_storage_space_mb {
-            if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) {
-                apibail_try_again!();
-            }
+        self.total_storage_space
+            .sub(prior_record_data_size as u64)
+            .unwrap();
+        self.total_storage_space
+            .add(new_record_data_size as u64)
+            .unwrap();
+        if !self.total_storage_space.check_limit() {
+            apibail_try_again!();
        }

        // Write subkey
@ -506,6 +543,9 @@ where
        })
        .expect("record should still be here");

+        // Update storage space
+        self.total_storage_space.commit().unwrap();
+
        Ok(())
    }

@ -513,16 +553,19 @@ where
    /// This will force a garbage collection of the space immediately
    /// If zero is passed in here, a garbage collection will be performed of dead records
    /// without removing any live records
-    pub async fn reclaim_space(&mut self, space: usize) {
+    pub async fn reclaim_space(&mut self, space: usize) -> usize {
        let mut reclaimed = 0usize;
        while reclaimed < space {
            if let Some((k, v)) = self.record_index.remove_lru() {
                reclaimed += mem::size_of::<RecordTableKey>();
                reclaimed += v.total_size();
                self.add_dead_record(k, v);
+            } else {
+                break;
            }
        }
        self.purge_dead_records(false).await;
+        reclaimed
    }

    pub(super) fn debug_records(&self) -> String {
@ -532,9 +575,9 @@ where
        out += "Record Index:\n";
        for (rik, rec) in &self.record_index {
            out += &format!(
-                "  {} @ {} len={} subkeys={}\n",
+                "  {} age={} len={} subkeys={}\n",
                rik.key.to_string(),
-                rec.last_touched().as_u64(),
+                debug_duration(get_timestamp() - rec.last_touched().as_u64()),
                rec.record_data_size(),
                rec.stored_subkeys(),
            );
@ -542,9 +585,9 @@ where
        out += &format!("Subkey Cache Count: {}\n", self.subkey_cache.len());
        out += &format!(
            "Subkey Cache Total Size: {}\n",
-            self.subkey_cache_total_size
+            self.subkey_cache_total_size.get()
        );
-        out += &format!("Total Storage Space: {}\n", self.total_storage_space);
+        out += &format!("Total Storage Space: {}\n", self.total_storage_space.get());
        out += &format!("Dead Records: {}\n", self.dead_records.len());
        for dr in &self.dead_records {
            out += &format!("  {}\n", dr.0.key.to_string());
--- a/veilid-core/src/storage_manager/set_value.rs
+++ b/veilid-core/src/storage_manager/set_value.rs
@ -62,7 +62,7 @@ impl StorageManager {
                let vres = rpc_processor
                    .clone()
                    .rpc_call_set_value(
-                        Destination::direct(next_node).with_safety(safety_selection),
+                        Destination::direct(next_node.clone()).with_safety(safety_selection),
                        key,
                        subkey,
                        value,
@ -70,7 +70,7 @@ impl StorageManager {
                        send_descriptor,
                    )
                    .await?;
-                let sva = network_result_value_or_log!(vres => {
+                let sva = network_result_value_or_log!(vres => [ format!(": next_node={} safety_selection={:?} key={} subkey={} send_descriptor={}", next_node, safety_selection, key, subkey, send_descriptor) ] {
                    // Any other failures, just try the next node and pretend this one never happened
                    return Ok(None);
                });
--- a/veilid-core/src/tests/common/test_veilid_config.rs
+++ b/veilid-core/src/tests/common/test_veilid_config.rs
@ -196,6 +196,7 @@ fn config_callback(key: String) -> ConfigCallbackReturn {
        "network.client_whitelist_timeout_ms" => Ok(Box::new(300_000u32)),
        "network.reverse_connection_receipt_time_ms" => Ok(Box::new(5_000u32)),
        "network.hole_punch_receipt_time_ms" => Ok(Box::new(5_000u32)),
+        "network.network_key_password" => Ok(Box::new(Option::<String>::None)),
        "network.routing_table.node_id" => Ok(Box::new(TypedKeySet::new())),
        "network.routing_table.node_id_secret" => Ok(Box::new(TypedSecretSet::new())),
        "network.routing_table.bootstrap" => Ok(Box::new(Vec::<String>::new())),
@ -330,6 +331,7 @@ pub async fn test_config() {
    assert_eq!(inner.network.client_whitelist_timeout_ms, 300_000u32);
    assert_eq!(inner.network.reverse_connection_receipt_time_ms, 5_000u32);
    assert_eq!(inner.network.hole_punch_receipt_time_ms, 5_000u32);
+    assert_eq!(inner.network.network_key_password, Option::<String>::None);
    assert_eq!(inner.network.rpc.concurrency, 2u32);
    assert_eq!(inner.network.rpc.queue_size, 1024u32);
    assert_eq!(inner.network.rpc.timeout_ms, 5_000u32);
--- a/veilid-core/src/veilid_api/api.rs
+++ b/veilid-core/src/veilid_api/api.rs
@ -142,7 +142,6 @@ impl VeilidAPI {
    }

    /// Connect to the network
-    #[instrument(level = "debug", err, skip_all)]
    pub async fn attach(&self) -> VeilidAPIResult<()> {
        let attachment_manager = self.attachment_manager()?;
        if !attachment_manager.attach().await {
@ -152,7 +151,6 @@ impl VeilidAPI {
    }

    /// Disconnect from the network
-    #[instrument(level = "debug", err, skip_all)]
    pub async fn detach(&self) -> VeilidAPIResult<()> {
        let attachment_manager = self.attachment_manager()?;
        if !attachment_manager.detach().await {
@ -164,7 +162,6 @@ impl VeilidAPI {
    ////////////////////////////////////////////////////////////////
    // Routing Context

-    #[instrument(level = "debug", skip(self))]
    pub fn routing_context(&self) -> RoutingContext {
        RoutingContext::new(self.clone())
    }
@ -175,7 +172,6 @@ impl VeilidAPI {
    /// Allocate a new private route set with default cryptography and network options
    /// Returns a route id and a publishable 'blob' with the route encrypted with each crypto kind
    /// Those nodes importing the blob will have their choice of which crypto kind to use
-    #[instrument(level = "debug", skip(self))]
    pub async fn new_private_route(&self) -> VeilidAPIResult<(RouteId, Vec<u8>)> {
        self.new_custom_private_route(
            &VALID_CRYPTO_KINDS,
@ -186,7 +182,6 @@ impl VeilidAPI {
    }

    ///
-    #[instrument(level = "debug", skip(self))]
    pub async fn new_custom_private_route(
        &self,
        crypto_kinds: &[CryptoKind],
@ -238,14 +233,12 @@ impl VeilidAPI {
        Ok((route_id, blob))
    }

-    #[instrument(level = "debug", skip(self))]
    pub fn import_remote_private_route(&self, blob: Vec<u8>) -> VeilidAPIResult<RouteId> {
        let rss = self.routing_table()?.route_spec_store();
        rss.import_remote_private_route(blob)
            .map_err(|e| VeilidAPIError::invalid_argument(e, "blob", "private route blob"))
    }

-    #[instrument(level = "debug", skip(self))]
    pub fn release_private_route(&self, route_id: RouteId) -> VeilidAPIResult<()> {
        let rss = self.routing_table()?.route_spec_store();
        if !rss.release_route(route_id) {
@ -257,7 +250,6 @@ impl VeilidAPI {
    ////////////////////////////////////////////////////////////////
    // App Calls

-    #[instrument(level = "debug", skip(self))]
    pub async fn app_call_reply(
        &self,
        call_id: OperationId,
@ -274,7 +266,6 @@ impl VeilidAPI {
    // Tunnel Building

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(level = "debug", err, skip(self))]
    pub async fn start_tunnel(
        &self,
        _endpoint_mode: TunnelMode,
@ -284,7 +275,6 @@ impl VeilidAPI {
    }

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(level = "debug", err, skip(self))]
    pub async fn complete_tunnel(
        &self,
        _endpoint_mode: TunnelMode,
@ -295,7 +285,6 @@ impl VeilidAPI {
    }

    #[cfg(feature = "unstable-tunnels")]
-    #[instrument(level = "debug", err, skip(self))]
    pub async fn cancel_tunnel(&self, _tunnel_id: TunnelId) -> VeilidAPIResult<bool> {
        panic!("unimplemented");
    }
--- a/veilid-core/src/veilid_api/debug.rs
+++ b/veilid-core/src/veilid_api/debug.rs
@ -31,6 +31,14 @@ fn get_string(text: &str) -> Option<String> {
    Some(text.to_owned())
 }

+fn get_subkeys(text: &str) -> Option<ValueSubkeyRangeSet> {
+    if let Some(n) = get_number(text) {
+        Some(ValueSubkeyRangeSet::single(n.try_into().ok()?))
+    } else {
+        ValueSubkeyRangeSet::from_str(text).ok()
+    }
+}
+
 fn get_route_id(
    rss: RouteSpecStore,
    allow_allocated: bool,
@ -901,6 +909,31 @@ impl VeilidAPI {
        return Ok(out);
    }

+    async fn debug_record_purge(&self, args: Vec<String>) -> VeilidAPIResult<String> {
+        // <local|remote> [bytes]
+        let storage_manager = self.storage_manager()?;
+
+        let scope = get_debug_argument_at(&args, 1, "debug_record_purge", "scope", get_string)?;
+        let bytes = get_debug_argument_at(&args, 2, "debug_record_purge", "bytes", get_number).ok();
+        let out = match scope.as_str() {
+            "local" => storage_manager.purge_local_records(bytes).await,
+            "remote" => storage_manager.purge_remote_records(bytes).await,
+            _ => "Invalid scope\n".to_owned(),
+        };
+        return Ok(out);
+    }
+    async fn debug_record_get(&self, args: Vec<String>) -> VeilidAPIResult<String> {
+        let storage_manager = self.storage_manager()?;
+
+        let key = get_debug_argument_at(&args, 1, "debug_record_get", "key", get_typed_key)?;
+        let subkeys =
+            get_debug_argument_at(&args, 2, "debug_record_subkeys", "subkeys", get_string)?;
+
+        // let rc = self.routing_context();
+
+        return Ok("TODO".to_owned());
+    }
+
    async fn debug_record(&self, args: String) -> VeilidAPIResult<String> {
        let args: Vec<String> = args.split_whitespace().map(|s| s.to_owned()).collect();

@ -908,6 +941,10 @@ impl VeilidAPI {

        if command == "list" {
            self.debug_record_list(args).await
+        } else if command == "purge" {
+            self.debug_record_purge(args).await
+        } else if command == "get" {
+            self.debug_record_get(args).await
        } else {
            Ok(">>> Unknown command\n".to_owned())
        }
@ -937,6 +974,8 @@ impl VeilidAPI {
              import <blob>
              test <route>
        record list <local|remote>
+               purge <local|remote> [bytes]
+               get <key> <subkeys>

        <destination> is:
         * direct:  <node>[+<safety>][<modifiers>]
@ -949,6 +988,9 @@ impl VeilidAPI {
        <protocoltype> is: udp|tcp|ws|wss
        <addresstype> is: ipv4|ipv6
        <routingdomain> is: public|local
+        <subkeys> is: 
+         * a number: 2
+         * a comma-separated inclusive range list: 1..=3,5..=8
    "#
        .to_owned())
    }
--- a/veilid-core/src/veilid_api/json_api/process.rs
+++ b/veilid-core/src/veilid_api/json_api/process.rs
@ -259,11 +259,11 @@ impl JsonRequestProcessor {
                        .add_routing_context(routing_context.clone().with_sequencing(sequencing)),
                }
            }
-            RoutingContextRequestOp::AppCall { target, request } => {
+            RoutingContextRequestOp::AppCall { target, message } => {
                RoutingContextResponseOp::AppCall {
                    result: to_json_api_result_with_vec_u8(
                        self.parse_target(target)
-                            .then(|tr| async { routing_context.app_call(tr?, request).await })
+                            .then(|tr| async { routing_context.app_call(tr?, message).await })
                            .await,
                    ),
                }
--- a/veilid-core/src/veilid_api/json_api/routing_context.rs
+++ b/veilid-core/src/veilid_api/json_api/routing_context.rs
@ -29,7 +29,7 @@ pub enum RoutingContextRequestOp {
        target: String,
        #[serde(with = "json_as_base64")]
        #[schemars(with = "String")]
-        request: Vec<u8>,
+        message: Vec<u8>,
    },
    AppMessage {
        target: String,
--- a/veilid-core/src/veilid_api/routing_context.rs
+++ b/veilid-core/src/veilid_api/routing_context.rs
@ -137,15 +137,14 @@ impl RoutingContext {
    ////////////////////////////////////////////////////////////////
    // App-level Messaging

-    #[instrument(level = "debug", err, skip(self))]
-    pub async fn app_call(&self, target: Target, request: Vec<u8>) -> VeilidAPIResult<Vec<u8>> {
+    pub async fn app_call(&self, target: Target, message: Vec<u8>) -> VeilidAPIResult<Vec<u8>> {
        let rpc_processor = self.api.rpc_processor()?;

        // Get destination
        let dest = self.get_destination(target).await?;

        // Send app message
-        let answer = match rpc_processor.rpc_call_app_call(dest, request).await {
+        let answer = match rpc_processor.rpc_call_app_call(dest, message).await {
            Ok(NetworkResult::Value(v)) => v,
            Ok(NetworkResult::Timeout) => apibail_timeout!(),
            Ok(NetworkResult::ServiceUnavailable(e)) => {
@ -165,7 +164,6 @@ impl RoutingContext {
        Ok(answer.answer)
    }

-    #[instrument(level = "debug", err, skip(self))]
    pub async fn app_message(&self, target: Target, message: Vec<u8>) -> VeilidAPIResult<()> {
        let rpc_processor = self.api.rpc_processor()?;

--- a/veilid-core/src/veilid_api/tests/fixtures.rs
+++ b/veilid-core/src/veilid_api/tests/fixtures.rs
@ -110,6 +110,7 @@ pub fn fix_veilidconfiginner() -> VeilidConfigInner {
            client_whitelist_timeout_ms: 7000,
            reverse_connection_receipt_time_ms: 8000,
            hole_punch_receipt_time_ms: 9000,
+            network_key_password: None,
            routing_table: VeilidConfigRoutingTable {
                node_id: TypedKeySet::new(),
                node_id_secret: TypedSecretSet::new(),
--- a/veilid-core/src/veilid_api/types/dht/mod.rs
+++ b/veilid-core/src/veilid_api/types/dht/mod.rs
@ -12,7 +12,5 @@ pub use value_subkey_range_set::*;

 /// Value subkey
 pub type ValueSubkey = u32;
-/// Value subkey range
-pub type ValueSubkeyRange = (u32, u32);
 /// Value sequence number
 pub type ValueSeqNum = u32;
--- a/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs
+++ b/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs
@ -41,6 +41,28 @@ impl ValueSubkeyRangeSet {
    }
 }

+impl FromStr for ValueSubkeyRangeSet {
+    type Err = VeilidAPIError;
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        let mut data = RangeSetBlaze::<ValueSubkey>::new();
+
+        for r in value.split(",") {
+            let r = r.trim();
+            let Some((ss, es)) = r.split_once("..=") else {
+                return Err(VeilidAPIError::parse_error("can not parse ValueSubkeyRangeSet", r));
+            };
+            let sn = ValueSubkey::from_str(ss)
+                .map_err(|e| VeilidAPIError::parse_error("could not parse ValueSubkey", e))?;
+            let en = ValueSubkey::from_str(es)
+                .map_err(|e| VeilidAPIError::parse_error("could not parse ValueSubkey", e))?;
+            data.ranges_insert(sn..=en);
+        }
+
+        Ok(ValueSubkeyRangeSet { data })
+    }
+}
+
 impl Deref for ValueSubkeyRangeSet {
    type Target = RangeSetBlaze<ValueSubkey>;

--- a/veilid-core/src/veilid_config.rs
+++ b/veilid-core/src/veilid_config.rs
@ -384,6 +384,7 @@ pub struct VeilidConfigNetwork {
    pub client_whitelist_timeout_ms: u32,
    pub reverse_connection_receipt_time_ms: u32,
    pub hole_punch_receipt_time_ms: u32,
+    pub network_key_password: Option<String>,
    pub routing_table: VeilidConfigRoutingTable,
    pub rpc: VeilidConfigRPC,
    pub dht: VeilidConfigDHT,
@ -695,6 +696,7 @@ impl VeilidConfig {
            get_config!(inner.network.client_whitelist_timeout_ms);
            get_config!(inner.network.reverse_connection_receipt_time_ms);
            get_config!(inner.network.hole_punch_receipt_time_ms);
+            get_config!(inner.network.network_key_password);
            get_config!(inner.network.routing_table.node_id);
            get_config!(inner.network.routing_table.node_id_secret);
            get_config!(inner.network.routing_table.bootstrap);
--- a/veilid-flutter/lib/routing_context.dart
+++ b/veilid-flutter/lib/routing_context.dart
@ -266,7 +266,7 @@ abstract class VeilidRoutingContext {
  Future<void> deleteDHTRecord(TypedKey key);
  Future<ValueData?> getDHTValue(TypedKey key, int subkey, bool forceRefresh);
  Future<ValueData?> setDHTValue(TypedKey key, int subkey, Uint8List data);
-  Future<Timestamp> watchDHTValues(
-      TypedKey key, ValueSubkeyRange subkeys, Timestamp expiration, int count);
-  Future<bool> cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys);
+  Future<Timestamp> watchDHTValues(TypedKey key, List<ValueSubkeyRange> subkeys,
+      Timestamp expiration, int count);
+  Future<bool> cancelDHTWatch(TypedKey key, List<ValueSubkeyRange> subkeys);
 }
--- a/veilid-flutter/lib/veilid_config.dart
+++ b/veilid-flutter/lib/veilid_config.dart
@ -703,6 +703,7 @@ class VeilidConfigNetwork {
  int clientWhitelistTimeoutMs;
  int reverseConnectionReceiptTimeMs;
  int holePunchReceiptTimeMs;
+  String? networkKeyPassword;
  VeilidConfigRoutingTable routingTable;
  VeilidConfigRPC rpc;
  VeilidConfigDHT dht;
@ -723,6 +724,7 @@ class VeilidConfigNetwork {
    required this.clientWhitelistTimeoutMs,
    required this.reverseConnectionReceiptTimeMs,
    required this.holePunchReceiptTimeMs,
+    this.networkKeyPassword,
    required this.routingTable,
    required this.rpc,
    required this.dht,
@ -745,6 +747,7 @@ class VeilidConfigNetwork {
      'client_whitelist_timeout_ms': clientWhitelistTimeoutMs,
      'reverse_connection_receipt_time_ms': reverseConnectionReceiptTimeMs,
      'hole_punch_receipt_time_ms': holePunchReceiptTimeMs,
+      'network_key_password': networkKeyPassword,
      'routing_table': routingTable.toJson(),
      'rpc': rpc.toJson(),
      'dht': dht.toJson(),
@ -770,6 +773,7 @@ class VeilidConfigNetwork {
        reverseConnectionReceiptTimeMs =
            json['reverse_connection_receipt_time_ms'],
        holePunchReceiptTimeMs = json['hole_punch_receipt_time_ms'],
+        networkKeyPassword = json['network_key_password'],
        routingTable = VeilidConfigRoutingTable.fromJson(json['routing_table']),
        rpc = VeilidConfigRPC.fromJson(json['rpc']),
        dht = VeilidConfigDHT.fromJson(json['dht']),
--- a/veilid-flutter/lib/veilid_ffi.dart
+++ b/veilid-flutter/lib/veilid_ffi.dart
@ -710,7 +710,7 @@ class VeilidRoutingContextFFI implements VeilidRoutingContext {
  }

  @override
-  Future<Timestamp> watchDHTValues(TypedKey key, ValueSubkeyRange subkeys,
+  Future<Timestamp> watchDHTValues(TypedKey key, List<ValueSubkeyRange> subkeys,
      Timestamp expiration, int count) async {
    final nativeKey = jsonEncode(key).toNativeUtf8();
    final nativeSubkeys = jsonEncode(subkeys).toNativeUtf8();
@ -726,7 +726,8 @@ class VeilidRoutingContextFFI implements VeilidRoutingContext {
  }

  @override
-  Future<bool> cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys) async {
+  Future<bool> cancelDHTWatch(
+      TypedKey key, List<ValueSubkeyRange> subkeys) async {
    final nativeKey = jsonEncode(key).toNativeUtf8();
    final nativeSubkeys = jsonEncode(subkeys).toNativeUtf8();

--- a/veilid-flutter/lib/veilid_js.dart
+++ b/veilid-flutter/lib/veilid_js.dart
@ -129,7 +129,7 @@ class VeilidRoutingContextJS implements VeilidRoutingContext {
  }

  @override
-  Future<Timestamp> watchDHTValues(TypedKey key, ValueSubkeyRange subkeys,
+  Future<Timestamp> watchDHTValues(TypedKey key, List<ValueSubkeyRange> subkeys,
      Timestamp expiration, int count) async {
    final ts = await _wrapApiPromise(js_util.callMethod(
        wasm, "routing_context_watch_dht_values", [
@ -143,7 +143,7 @@ class VeilidRoutingContextJS implements VeilidRoutingContext {
  }

  @override
-  Future<bool> cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys) {
+  Future<bool> cancelDHTWatch(TypedKey key, List<ValueSubkeyRange> subkeys) {
    return _wrapApiPromise(js_util.callMethod(
        wasm,
        "routing_context_cancel_dht_watch",
--- a/veilid-flutter/rust/src/dart_ffi.rs
+++ b/veilid-flutter/rust/src/dart_ffi.rs
@ -264,7 +264,6 @@ pub extern "C" fn initialize_veilid_core(platform_config: FfiStr) {
 }

 #[no_mangle]
-#[instrument(level = "debug")]
 pub extern "C" fn change_log_level(layer: FfiStr, log_level: FfiStr) {
    // get layer to change level on
    let layer = layer.into_opt_string().unwrap_or("all".to_owned());
@ -1502,7 +1501,6 @@ pub struct VeilidVersion {
 }

 #[no_mangle]
-#[instrument]
 pub extern "C" fn veilid_version() -> VeilidVersion {
    let (major, minor, patch) = veilid_core::veilid_version();
    VeilidVersion {
--- a/veilid-python/poetry_install.sh
+++ b/veilid-python/poetry_install.sh
@ -1,3 +1,4 @@
 #!/bin/bash
+poetry config --local virtualenvs.in-project true
 export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
 poetry install
--- a/veilid-python/tests/test_dht.py
+++ b/veilid-python/tests/test_dht.py
@ -67,10 +67,15 @@ async def test_set_get_dht_value(api_connection: veilid.VeilidAPI):
        vd2 = await rc.get_dht_value(rec.key, 0, False)
        assert vd2 != None
        
+        vd3 = await rc.get_dht_value(rec.key, 0, True)
+        assert vd3 != None
+
        print("vd: {}", vd.__dict__)
        print("vd2: {}", vd2.__dict__)
+        print("vd3: {}", vd3.__dict__)

        assert vd == vd2
+        assert vd2 == vd3

        await rc.close_dht_record(rec.key)
        await rc.delete_dht_record(rec.key)
--- a/veilid-python/tests/test_routing_context.py
+++ b/veilid-python/tests/test_routing_context.py
@ -1,6 +1,9 @@
 # Routing context veilid tests

 import asyncio
+import random
+import sys
+import os

 import pytest
 import veilid
@ -15,11 +18,11 @@ from .conftest import server_info
 async def test_routing_contexts(api_connection: veilid.VeilidAPI):
    rc = await api_connection.new_routing_context()
    async with rc:
-        rcp = await rc.with_privacy(release = False)
+        rcp = await rc.with_privacy(release=False)
        async with rcp:
-            rcps = await rcp.with_sequencing(veilid.Sequencing.ENSURE_ORDERED, release = False)
+            rcps = await rcp.with_sequencing(veilid.Sequencing.ENSURE_ORDERED, release=False)
            async with rcps:
-                rcpscp = await rcps.with_custom_privacy(veilid.Stability.RELIABLE, release = False)
+                rcpscp = await rcps.with_custom_privacy(veilid.Stability.RELIABLE, release=False)
                await rcpscp.release()


@ -109,3 +112,158 @@ async def test_routing_context_app_call_loopback():
            # now we should get the reply from the call
            result = await app_call_task
            assert result == reply
+
+
+@pytest.mark.asyncio
+async def test_routing_context_app_message_loopback_big_packets():
+
+    app_message_queue: asyncio.Queue = asyncio.Queue()
+
+    global got_message
+    got_message = 0
+    async def app_message_queue_update_callback(update: veilid.VeilidUpdate):
+        if update.kind == veilid.VeilidUpdateKind.APP_MESSAGE:
+            global got_message
+            got_message += 1
+            sys.stdout.write("{} ".format(got_message))
+            await app_message_queue.put(update)
+
+    sent_messages: set[bytes] = set()
+
+    hostname, port = server_info()
+    api = await veilid.json_api_connect(
+        hostname, port, app_message_queue_update_callback
+    )
+    async with api:
+        # purge routes to ensure we start fresh
+        await api.debug("purge routes")
+
+        # make a routing context that uses a safety route
+        rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED)
+        async with rc:
+
+            # make a new local private route
+            prl, blob = await api.new_private_route()
+
+            # import it as a remote route as well so we can send to it
+            prr = await api.import_remote_private_route(blob)
+
+            # do this test 1000 times
+            for _ in range(1000):
+
+                # send a random sized random app message to our own private route
+                message = random.randbytes(random.randint(0, 32768))
+                await rc.app_message(prr, message)
+
+                sent_messages.add(message)
+
+            # we should get the same messages back
+            print(len(sent_messages))
+            for n in range(len(sent_messages)):
+                print(n)
+                update: veilid.VeilidUpdate = await asyncio.wait_for(
+                    app_message_queue.get(), timeout=10
+                )
+                assert isinstance(update.detail, veilid.VeilidAppMessage)
+
+                assert update.detail.message in sent_messages
+
+@pytest.mark.asyncio
+async def test_routing_context_app_call_loopback_big_packets():
+    global got_message
+    got_message = 0
+    
+    app_call_queue: asyncio.Queue = asyncio.Queue()
+
+    async def app_call_queue_update_callback(update: veilid.VeilidUpdate):
+        if update.kind == veilid.VeilidUpdateKind.APP_CALL:
+            await app_call_queue.put(update)
+
+    async def app_call_queue_task_handler(api: veilid.VeilidAPI):
+        while True:
+            update = await app_call_queue.get()
+            
+            global got_message
+            got_message += 1
+            
+            sys.stdout.write("{} ".format(got_message))
+            sys.stdout.flush()
+
+            await api.app_call_reply(update.detail.call_id, update.detail.message)
+        
+    hostname, port = server_info()
+    api = await veilid.json_api_connect(
+        hostname, port, app_call_queue_update_callback
+    )
+    async with api:
+        # purge routes to ensure we start fresh
+        await api.debug("purge routes")
+
+        app_call_task = asyncio.create_task(
+            app_call_queue_task_handler(api), name="app call task"
+        )
+
+        # make a routing context that uses a safety route
+        rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED)
+        async with rc:
+
+            # make a new local private route
+            prl, blob = await api.new_private_route()
+
+            # import it as a remote route as well so we can send to it
+            prr = await api.import_remote_private_route(blob)
+
+            # do this test 10 times
+            for _ in range(10):
+
+                # send a random sized random app message to our own private route
+                message = random.randbytes(random.randint(0, 32768))
+                out_message = await rc.app_call(prr, message)
+
+                assert message == out_message
+        
+        app_call_task.cancel()
+
+
+@pytest.mark.skipif(os.getenv("NOSKIP")!="1", reason="unneeded test, only for performance check")
+@pytest.mark.asyncio
+async def test_routing_context_app_message_loopback_bandwidth():
+
+    app_message_queue: asyncio.Queue = asyncio.Queue()
+
+    async def app_message_queue_update_callback(update: veilid.VeilidUpdate):
+        if update.kind == veilid.VeilidUpdateKind.APP_MESSAGE:
+            await app_message_queue.put(True)
+
+    hostname, port = server_info()
+    api = await veilid.json_api_connect(
+        hostname, port, app_message_queue_update_callback
+    )
+    async with api:
+        # purge routes to ensure we start fresh
+        await api.debug("purge routes")
+
+        # make a routing context that uses a safety route
+        #rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED)
+        #rc = await (await api.new_routing_context()).with_privacy()
+        rc = await api.new_routing_context()
+        async with rc:
+
+            # make a new local private route
+            prl, blob = await api.new_private_route()
+
+            # import it as a remote route as well so we can send to it
+            prr = await api.import_remote_private_route(blob)
+
+            # do this test 1000 times
+            message = random.randbytes(16384)
+            for _ in range(10000):
+
+                # send a random sized random app message to our own private route
+                await rc.app_message(prr, message)
+
+            # we should get the same number of messages back (not storing all that data)
+            for _ in range(10000):
+                await asyncio.wait_for(
+                    app_message_queue.get(), timeout=10
+                )
--- a/veilid-python/veilid/config.py
+++ b/veilid-python/veilid/config.py
@ -203,6 +203,7 @@ class VeilidConfigNetwork(ConfigBase):
    client_whitelist_timeout_ms: int
    reverse_connection_receipt_time_ms: int
    hole_punch_receipt_time_ms: int
+    network_key_password: Optional[str]
    routing_table: VeilidConfigRoutingTable
    rpc: VeilidConfigRPC
    dht: VeilidConfigDHT
--- a/veilid-python/veilid/json_api.py
+++ b/veilid-python/veilid/json_api.py
@ -135,6 +135,8 @@ class _JsonVeilidAPI(VeilidAPI):
        # Resolve the request's future to the response json
        if reqfuture is not None:
            reqfuture.set_result(j)
+        else:
+            print("Missing id: {}", id)

    async def handle_recv_messages(self):
        # Read lines until we're done
@ -485,7 +487,7 @@ class _JsonRoutingContext(RoutingContext):
            await self.release()
        return self.__class__(self.api, new_rc_id)

-    async def app_call(self, target: TypedKey | RouteId, request: bytes) -> bytes:
+    async def app_call(self, target: TypedKey | RouteId, message: bytes) -> bytes:
        return urlsafe_b64decode_no_pad(
            raise_api_result(
                await self.api.send_ndjson_request(
@ -494,7 +496,7 @@ class _JsonRoutingContext(RoutingContext):
                    rc_id=self.rc_id,
                    rc_op=RoutingContextOperation.APP_CALL,
                    target=target,
-                    request=request,
+                    message=message,
                )
            )
        )
--- a/veilid-python/veilid/schema/RecvMessage.json
+++ b/veilid-python/veilid/schema/RecvMessage.json
@ -3516,6 +3516,12 @@
          "format": "uint32",
          "minimum": 0.0
        },
+        "network_key_password": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
        "protocol": {
          "$ref": "#/definitions/VeilidConfigProtocol"
        },
--- a/veilid-python/veilid/schema/Request.json
+++ b/veilid-python/veilid/schema/Request.json
@ -259,20 +259,20 @@
        {
          "type": "object",
          "required": [
+            "message",
            "rc_op",
-            "request",
            "target"
          ],
          "properties": {
+            "message": {
+              "type": "string"
+            },
            "rc_op": {
              "type": "string",
              "enum": [
                "AppCall"
              ]
            },
-            "request": {
-              "type": "string"
-            },
            "target": {
              "type": "string"
            }
--- a/veilid-server/Cargo.toml
+++ b/veilid-server/Cargo.toml
@ -17,6 +17,7 @@ crypto-test-none = ["rt-tokio", "veilid-core/crypto-test-none"]
 rt-async-std = ["veilid-core/rt-async-std", "async-std", "opentelemetry/rt-async-std", "opentelemetry-otlp/grpc-sys"]
 rt-tokio = ["veilid-core/rt-tokio", "tokio", "tokio-stream", "tokio-util", "opentelemetry/rt-tokio", "console-subscriber"]
 tracking = ["veilid-core/tracking"]
+network-result-extra = ["veilid-core/network-result-extra"]

 [dependencies]
 veilid-core = { path = "../veilid-core", default-features = false }
--- a/veilid-server/src/client_api.rs
+++ b/veilid-server/src/client_api.rs
@ -76,7 +76,6 @@ impl ClientApi {
        crate::server::shutdown();
    }

-    #[instrument(level = "trace", skip_all)]
    fn change_log_level(
        &self,
        layer: String,
@ -107,7 +106,6 @@ impl ClientApi {
        trace!("ClientApi::stop: stopped");
    }

-    #[instrument(level = "trace", skip(self), err)]
    async fn handle_incoming(self, bind_addr: SocketAddr) -> std::io::Result<()> {
        let listener = TcpListener::bind(bind_addr).await?;
        debug!("Client API listening on: {:?}", bind_addr);
@ -402,7 +400,6 @@ impl ClientApi {
        awg.done();
    }

-    #[instrument(level = "trace", skip(self))]
    pub fn handle_update(&self, veilid_update: veilid_core::VeilidUpdate) {
        // serialize update to NDJSON
        let veilid_update = serialize_json(json_api::RecvMessage::Update(veilid_update)) + "\n";
--- a/Show More
+++ b/Show More