Merge branch 'fanout-preempt' into 'main'

Add fanout preempt

See merge request veilid/veilid!280
This commit is contained in:
Christien Rioux 2024-05-21 23:07:55 +00:00
commit 8e8ee06fe9
33 changed files with 1740 additions and 1006 deletions

541
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -237,7 +237,7 @@ impl AttachmentManager {
}
// see if we need to restart the network
if netman.needs_restart() {
if netman.network_needs_restart() {
info!("Restarting network");
restart = true;
break;

View File

@ -105,6 +105,8 @@ impl AddressFilter {
// When the network restarts, some of the address filter can be cleared
pub fn restart(&self) {
let mut inner = self.inner.lock();
inner.conn_count_by_ip4.clear();
inner.conn_count_by_ip6_prefix.clear();
inner.dial_info_failures.clear();
}
@ -272,6 +274,7 @@ impl AddressFilter {
let mut inner = self.inner.lock();
inner.punishments_by_ip4.clear();
inner.punishments_by_ip6_prefix.clear();
self.unlocked_inner.routing_table.clear_punishments();
inner.punishments_by_node_id.clear();
}

View File

@ -117,8 +117,9 @@ pub(crate) enum NodeContactMethod {
/// Must use outbound relay to reach the node
OutboundRelay(NodeRef),
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
struct NodeContactMethodCacheKey {
node_ids: TypedKeyGroup,
own_node_info_ts: Timestamp,
target_node_info_ts: Timestamp,
target_node_ref_filter: Option<NodeRefFilter>,
@ -305,6 +306,13 @@ impl NetworkManager {
.net
.clone()
}
fn opt_net(&self) -> Option<Network> {
self.unlocked_inner
.components
.read()
.as_ref()
.map(|x| x.net.clone())
}
fn receipt_manager(&self) -> ReceiptManager {
self.unlocked_inner
.components
@ -323,6 +331,14 @@ impl NetworkManager {
.rpc_processor
.clone()
}
pub fn opt_rpc_processor(&self) -> Option<RPCProcessor> {
self.unlocked_inner
.components
.read()
.as_ref()
.map(|x| x.rpc_processor.clone())
}
pub fn connection_manager(&self) -> ConnectionManager {
self.unlocked_inner
.components
@ -332,6 +348,14 @@ impl NetworkManager {
.connection_manager
.clone()
}
pub fn opt_connection_manager(&self) -> Option<ConnectionManager> {
self.unlocked_inner
.components
.read()
.as_ref()
.map(|x| x.connection_manager.clone())
}
pub fn update_callback(&self) -> UpdateCallback {
self.unlocked_inner
.update_callback
@ -496,9 +520,16 @@ impl NetworkManager {
}
}
pub fn needs_restart(&self) -> bool {
let net = self.net();
net.needs_restart()
pub fn network_needs_restart(&self) -> bool {
self.opt_net()
.map(|net| net.needs_restart())
.unwrap_or(false)
}
pub fn network_is_started(&self) -> bool {
self.opt_net()
.and_then(|net| net.is_started())
.unwrap_or(false)
}
pub fn generate_node_status(&self, _routing_domain: RoutingDomain) -> NodeStatus {
@ -1074,7 +1105,7 @@ impl NetworkManager {
};
// Cache the envelope information in the routing table
let source_noderef = match routing_table.register_node_with_existing_connection(
let mut source_noderef = match routing_table.register_node_with_existing_connection(
envelope.get_sender_typed_id(),
flow,
ts,
@ -1088,6 +1119,9 @@ impl NetworkManager {
};
source_noderef.add_envelope_version(envelope.get_version());
// Enforce routing domain
source_noderef.merge_filter(NodeRefFilter::new().with_routing_domain(routing_domain));
// Pass message to RPC system
rpc.enqueue_direct_message(envelope, source_noderef, flow, routing_domain, body)?;

View File

@ -72,8 +72,8 @@ pub const MAX_CAPABILITIES: usize = 64;
/////////////////////////////////////////////////////////////////
struct NetworkInner {
/// true if the low-level network is running
network_started: bool,
/// Some(true) if the low-level network is running, Some(false) if it is not, None if it is in transit
network_started: Option<bool>,
/// set if the network needs to be restarted due to a low level configuration change
/// such as dhcp release or change of address or interfaces being added or removed
network_needs_restart: bool,
@ -109,6 +109,8 @@ struct NetworkInner {
listener_states: BTreeMap<SocketAddr, Arc<RwLock<ListenerState>>>,
/// Preferred local addresses for protocols/address combinations for outgoing connections
preferred_local_addresses: BTreeMap<(ProtocolType, AddressType), SocketAddr>,
/// The list of stable interface addresses we have last seen
stable_interface_addresses_at_startup: Vec<IpAddr>,
}
struct NetworkUnlockedInner {
@ -137,7 +139,7 @@ pub(in crate::network_manager) struct Network {
impl Network {
fn new_inner() -> NetworkInner {
NetworkInner {
network_started: false,
network_started: Some(false),
network_needs_restart: false,
needs_public_dial_info_check: false,
network_already_cleared: false,
@ -155,6 +157,7 @@ impl Network {
tls_acceptor: None,
listener_states: BTreeMap::new(),
preferred_local_addresses: BTreeMap::new(),
stable_interface_addresses_at_startup: Vec::new(),
}
}
@ -170,7 +173,7 @@ impl Network {
connection_manager,
interfaces: NetworkInterfaces::new(),
update_network_class_task: TickTask::new(1),
network_interfaces_task: TickTask::new(5),
network_interfaces_task: TickTask::new(1),
upnp_task: TickTask::new(1),
igd_manager: igd_manager::IGDManager::new(config.clone()),
}
@ -339,13 +342,14 @@ impl Network {
pub fn get_stable_interface_addresses(&self) -> Vec<IpAddr> {
let addrs = self.unlocked_inner.interfaces.stable_addresses();
let addrs: Vec<IpAddr> = addrs
let mut addrs: Vec<IpAddr> = addrs
.into_iter()
.filter(|addr| {
let address = Address::from_ip_addr(*addr);
address.is_local() || address.is_global()
})
.collect();
addrs.sort();
addrs
}
@ -361,7 +365,11 @@ impl Network {
return Ok(false);
}
self.inner.lock().needs_public_dial_info_check = true;
let mut inner = self.inner.lock();
let new_stable_interface_addresses = self.get_stable_interface_addresses();
if new_stable_interface_addresses != inner.stable_interface_addresses_at_startup {
inner.network_needs_restart = true;
}
Ok(true)
}
@ -675,196 +683,213 @@ impl Network {
#[instrument(level = "debug", err, skip_all)]
pub async fn startup(&self) -> EyreResult<()> {
// initialize interfaces
self.unlocked_inner.interfaces.refresh().await?;
self.inner.lock().network_started = None;
let startup_func = async {
// initialize interfaces
self.unlocked_inner.interfaces.refresh().await?;
// build the set of networks we should consider for the 'LocalNetwork' routing domain
let mut local_networks: HashSet<(IpAddr, IpAddr)> = HashSet::new();
self.unlocked_inner
.interfaces
.with_interfaces(|interfaces| {
log_net!(debug "interfaces: {:#?}", interfaces);
// build the set of networks we should consider for the 'LocalNetwork' routing domain
let mut local_networks: HashSet<(IpAddr, IpAddr)> = HashSet::new();
self.unlocked_inner
.interfaces
.with_interfaces(|interfaces| {
log_net!(debug "interfaces: {:#?}", interfaces);
for intf in interfaces.values() {
// Skip networks that we should never encounter
if intf.is_loopback() || !intf.is_running() {
continue;
for intf in interfaces.values() {
// Skip networks that we should never encounter
if intf.is_loopback() || !intf.is_running() {
continue;
}
// Add network to local networks table
for addr in &intf.addrs {
let netmask = addr.if_addr().netmask();
let network_ip = ipaddr_apply_netmask(addr.if_addr().ip(), netmask);
local_networks.insert((network_ip, netmask));
}
}
// Add network to local networks table
for addr in &intf.addrs {
let netmask = addr.if_addr().netmask();
let network_ip = ipaddr_apply_netmask(addr.if_addr().ip(), netmask);
local_networks.insert((network_ip, netmask));
}
}
});
let local_networks: Vec<(IpAddr, IpAddr)> = local_networks.into_iter().collect();
self.unlocked_inner
.routing_table
.configure_local_network_routing_domain(local_networks);
// determine if we have ipv4/ipv6 addresses
{
let mut inner = self.inner.lock();
inner.enable_ipv4 = false;
for addr in self.get_stable_interface_addresses() {
if addr.is_ipv4() {
log_net!(debug "enable address {:?} as ipv4", addr);
inner.enable_ipv4 = true;
} else if addr.is_ipv6() {
let address = Address::from_ip_addr(addr);
if address.is_global() {
log_net!(debug "enable address {:?} as ipv6 global", address);
inner.enable_ipv6_global = true;
} else if address.is_local() {
log_net!(debug "enable address {:?} as ipv6 local", address);
inner.enable_ipv6_local = true;
});
let local_networks: Vec<(IpAddr, IpAddr)> = local_networks.into_iter().collect();
self.unlocked_inner
.routing_table
.configure_local_network_routing_domain(local_networks);
// determine if we have ipv4/ipv6 addresses
{
let mut inner = self.inner.lock();
let stable_interface_addresses = self.get_stable_interface_addresses();
inner.enable_ipv4 = false;
for addr in stable_interface_addresses.iter().copied() {
if addr.is_ipv4() {
log_net!(debug "enable address {:?} as ipv4", addr);
inner.enable_ipv4 = true;
} else if addr.is_ipv6() {
let address = Address::from_ip_addr(addr);
if address.is_global() {
log_net!(debug "enable address {:?} as ipv6 global", address);
inner.enable_ipv6_global = true;
} else if address.is_local() {
log_net!(debug "enable address {:?} as ipv6 local", address);
inner.enable_ipv6_local = true;
}
}
}
inner.stable_interface_addresses_at_startup = stable_interface_addresses;
}
}
// Build our protocol config to share it with other nodes
let protocol_config = {
let mut inner = self.inner.lock();
// Create stop source
inner.stop_source = Some(StopSource::new());
// get protocol config
// Build our protocol config to share it with other nodes
let protocol_config = {
let c = self.config.get();
let mut inbound = ProtocolTypeSet::new();
let mut inner = self.inner.lock();
if c.network.protocol.udp.enabled {
inbound.insert(ProtocolType::UDP);
}
if c.network.protocol.tcp.listen {
inbound.insert(ProtocolType::TCP);
}
if c.network.protocol.ws.listen {
inbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.listen {
inbound.insert(ProtocolType::WSS);
}
// Create stop source
inner.stop_source = Some(StopSource::new());
let mut outbound = ProtocolTypeSet::new();
if c.network.protocol.udp.enabled {
outbound.insert(ProtocolType::UDP);
}
if c.network.protocol.tcp.connect {
outbound.insert(ProtocolType::TCP);
}
if c.network.protocol.ws.connect {
outbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.connect {
outbound.insert(ProtocolType::WSS);
}
// get protocol config
let protocol_config = {
let c = self.config.get();
let mut inbound = ProtocolTypeSet::new();
let mut family_global = AddressTypeSet::new();
let mut family_local = AddressTypeSet::new();
if inner.enable_ipv4 {
family_global.insert(AddressType::IPV4);
family_local.insert(AddressType::IPV4);
}
if inner.enable_ipv6_global {
family_global.insert(AddressType::IPV6);
}
if inner.enable_ipv6_local {
family_local.insert(AddressType::IPV6);
}
if c.network.protocol.udp.enabled {
inbound.insert(ProtocolType::UDP);
}
if c.network.protocol.tcp.listen {
inbound.insert(ProtocolType::TCP);
}
if c.network.protocol.ws.listen {
inbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.listen {
inbound.insert(ProtocolType::WSS);
}
// set up the routing table's network config
// if we have static public dialinfo, upgrade our network class
let public_internet_capabilities = {
PUBLIC_INTERNET_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
};
let local_network_capabilities = {
LOCAL_NETWORK_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
let mut outbound = ProtocolTypeSet::new();
if c.network.protocol.udp.enabled {
outbound.insert(ProtocolType::UDP);
}
if c.network.protocol.tcp.connect {
outbound.insert(ProtocolType::TCP);
}
if c.network.protocol.ws.connect {
outbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.connect {
outbound.insert(ProtocolType::WSS);
}
let mut family_global = AddressTypeSet::new();
let mut family_local = AddressTypeSet::new();
if inner.enable_ipv4 {
family_global.insert(AddressType::IPV4);
family_local.insert(AddressType::IPV4);
}
if inner.enable_ipv6_global {
family_global.insert(AddressType::IPV6);
}
if inner.enable_ipv6_local {
family_local.insert(AddressType::IPV6);
}
// set up the routing table's network config
// if we have static public dialinfo, upgrade our network class
let public_internet_capabilities = {
PUBLIC_INTERNET_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
};
let local_network_capabilities = {
LOCAL_NETWORK_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
};
ProtocolConfig {
outbound,
inbound,
family_global,
family_local,
public_internet_capabilities,
local_network_capabilities,
}
};
inner.protocol_config = protocol_config.clone();
ProtocolConfig {
outbound,
inbound,
family_global,
family_local,
public_internet_capabilities,
local_network_capabilities,
}
protocol_config
};
inner.protocol_config = protocol_config.clone();
protocol_config
};
// Start editing routing table
let mut editor_public_internet = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::PublicInternet);
let mut editor_local_network = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::LocalNetwork);
// Start editing routing table
let mut editor_public_internet = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::PublicInternet);
let mut editor_local_network = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::LocalNetwork);
// start listeners
if protocol_config.inbound.contains(ProtocolType::UDP) {
self.bind_udp_protocol_handlers(&mut editor_public_internet, &mut editor_local_network)
// start listeners
if protocol_config.inbound.contains(ProtocolType::UDP) {
self.bind_udp_protocol_handlers(
&mut editor_public_internet,
&mut editor_local_network,
)
.await?;
}
if protocol_config.inbound.contains(ProtocolType::WS) {
self.start_ws_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
if protocol_config.inbound.contains(ProtocolType::WSS) {
self.start_wss_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
if protocol_config.inbound.contains(ProtocolType::TCP) {
self.start_tcp_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
editor_public_internet.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_global,
protocol_config.public_internet_capabilities,
);
editor_local_network.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_local,
protocol_config.local_network_capabilities,
);
let detect_address_changes = {
let c = self.config.get();
c.network.detect_address_changes
};
if !detect_address_changes {
let inner = self.inner.lock();
if !inner.static_public_dialinfo.is_empty() {
editor_public_internet.set_network_class(Some(NetworkClass::InboundCapable));
}
}
if protocol_config.inbound.contains(ProtocolType::WS) {
self.start_ws_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
if protocol_config.inbound.contains(ProtocolType::WSS) {
self.start_wss_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
if protocol_config.inbound.contains(ProtocolType::TCP) {
self.start_tcp_listeners(&mut editor_public_internet, &mut editor_local_network)
.await?;
}
// commit routing table edits
editor_public_internet.commit(true).await;
editor_local_network.commit(true).await;
editor_public_internet.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_global,
protocol_config.public_internet_capabilities,
);
editor_local_network.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_local,
protocol_config.local_network_capabilities,
);
let detect_address_changes = {
let c = self.config.get();
c.network.detect_address_changes
};
if !detect_address_changes {
let inner = self.inner.lock();
if !inner.static_public_dialinfo.is_empty() {
editor_public_internet.set_network_class(Some(NetworkClass::InboundCapable));
}
}
// commit routing table edits
editor_public_internet.commit(true).await;
editor_local_network.commit(true).await;
Ok(())
};
let res = startup_func.await;
if res.is_err() {
info!("network failed to start");
self.inner.lock().network_started = Some(false);
return res;
}
info!("network started");
self.inner.lock().network_started = true;
self.inner.lock().network_started = Some(true);
Ok(())
}
@ -872,7 +897,7 @@ impl Network {
self.inner.lock().network_needs_restart
}
pub fn is_started(&self) -> bool {
pub fn is_started(&self) -> Option<bool> {
self.inner.lock().network_started
}
@ -885,6 +910,8 @@ impl Network {
pub async fn shutdown(&self) {
log_net!(debug "starting low level network shutdown");
self.inner.lock().network_started = None;
let routing_table = self.routing_table();
// Stop all tasks

View File

@ -394,6 +394,7 @@ impl NetworkManager {
// Get cache key
let ncm_key = NodeContactMethodCacheKey {
node_ids: target_node_ref.node_ids(),
own_node_info_ts: routing_table.get_own_node_info_ts(routing_domain),
target_node_info_ts: target_node_ref.node_info_ts(routing_domain),
target_node_ref_filter: target_node_ref.filter_ref().cloned(),

View File

@ -76,15 +76,7 @@ impl NetworkManager {
}
pub fn get_veilid_state(&self) -> Box<VeilidStateNetwork> {
let has_state = self
.unlocked_inner
.components
.read()
.as_ref()
.map(|c| c.net.is_started())
.unwrap_or(false);
if !has_state {
if !self.network_is_started() {
return Box::new(VeilidStateNetwork {
started: false,
bps_down: 0.into(),

View File

@ -52,7 +52,7 @@ pub const MAX_CAPABILITIES: usize = 64;
/////////////////////////////////////////////////////////////////
struct NetworkInner {
network_started: bool,
network_started: Option<bool>,
network_needs_restart: bool,
protocol_config: ProtocolConfig,
}
@ -74,7 +74,7 @@ pub(in crate::network_manager) struct Network {
impl Network {
fn new_inner() -> NetworkInner {
NetworkInner {
network_started: false,
network_started: Some(false),
network_needs_restart: false,
protocol_config: Default::default(),
}
@ -334,70 +334,81 @@ impl Network {
/////////////////////////////////////////////////////////////////
pub async fn startup(&self) -> EyreResult<()> {
log_net!(debug "starting network");
// get protocol config
let protocol_config = {
let c = self.config.get();
let inbound = ProtocolTypeSet::new();
let mut outbound = ProtocolTypeSet::new();
self.inner.lock().network_started = None;
let startup_func = async {
log_net!(debug "starting network");
// get protocol config
let protocol_config = {
let c = self.config.get();
let inbound = ProtocolTypeSet::new();
let mut outbound = ProtocolTypeSet::new();
if c.network.protocol.ws.connect {
outbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.connect {
outbound.insert(ProtocolType::WSS);
}
if c.network.protocol.ws.connect {
outbound.insert(ProtocolType::WS);
}
if c.network.protocol.wss.connect {
outbound.insert(ProtocolType::WSS);
}
let supported_address_types: AddressTypeSet = if is_ipv6_supported() {
AddressType::IPV4 | AddressType::IPV6
} else {
AddressType::IPV4.into()
let supported_address_types: AddressTypeSet = if is_ipv6_supported() {
AddressType::IPV4 | AddressType::IPV6
} else {
AddressType::IPV4.into()
};
let family_global = supported_address_types;
let family_local = supported_address_types;
let public_internet_capabilities = {
PUBLIC_INTERNET_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
};
ProtocolConfig {
outbound,
inbound,
family_global,
family_local,
local_network_capabilities: vec![],
public_internet_capabilities,
}
};
self.inner.lock().protocol_config = protocol_config.clone();
let family_global = supported_address_types;
let family_local = supported_address_types;
// Start editing routing table
let mut editor_public_internet = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::PublicInternet);
let public_internet_capabilities = {
PUBLIC_INTERNET_CAPABILITIES
.iter()
.copied()
.filter(|cap| !c.capabilities.disable.contains(cap))
.collect::<Vec<Capability>>()
};
// set up the routing table's network config
// if we have static public dialinfo, upgrade our network class
ProtocolConfig {
outbound,
inbound,
family_global,
family_local,
local_network_capabilities: vec![],
public_internet_capabilities,
}
editor_public_internet.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_global,
protocol_config.public_internet_capabilities.clone(),
);
editor_public_internet.set_network_class(Some(NetworkClass::WebApp));
// commit routing table edits
editor_public_internet.commit(true).await;
Ok(())
};
self.inner.lock().protocol_config = protocol_config.clone();
// Start editing routing table
let mut editor_public_internet = self
.unlocked_inner
.routing_table
.edit_routing_domain(RoutingDomain::PublicInternet);
let res = startup_func.await;
if res.is_err() {
info!("network failed to start");
self.inner.lock().network_started = Some(false);
return res;
}
// set up the routing table's network config
// if we have static public dialinfo, upgrade our network class
editor_public_internet.setup_network(
protocol_config.outbound,
protocol_config.inbound,
protocol_config.family_global,
protocol_config.public_internet_capabilities.clone(),
);
editor_public_internet.set_network_class(Some(NetworkClass::WebApp));
// commit routing table edits
editor_public_internet.commit(true).await;
self.inner.lock().network_started = true;
log_net!(debug "network started");
info!("network started");
self.inner.lock().network_started = Some(true);
Ok(())
}
@ -405,7 +416,7 @@ impl Network {
self.inner.lock().network_needs_restart
}
pub fn is_started(&self) -> bool {
pub fn is_started(&self) -> Option<bool> {
self.inner.lock().network_started
}

View File

@ -275,9 +275,9 @@ impl BucketEntryInner {
&& signed_node_info.timestamp() == current_sni.timestamp()
{
// No need to update the signednodeinfo though since the timestamp is the same
// Touch the node and let it try to live again
// Let the node try to live again but don't mark it as seen yet
self.updated_since_last_network_change = true;
self.touch_last_seen(get_aligned_timestamp());
self.make_not_dead(get_aligned_timestamp());
}
return;
}
@ -293,10 +293,11 @@ impl BucketEntryInner {
let envelope_support = signed_node_info.node_info().envelope_support().to_vec();
// Update the signed node info
// Let the node try to live again but don't mark it as seen yet
*opt_current_sni = Some(Box::new(signed_node_info));
self.set_envelope_support(envelope_support);
self.updated_since_last_network_change = true;
self.touch_last_seen(get_aligned_timestamp());
self.make_not_dead(get_aligned_timestamp());
// If we're updating an entry's node info, purge all
// but the last connection in our last connections list
@ -760,6 +761,13 @@ impl BucketEntryInner {
self.peer_stats.rpc_stats.last_seen_ts = Some(ts);
}
pub(super) fn make_not_dead(&mut self, cur_ts: Timestamp) {
self.peer_stats.rpc_stats.last_seen_ts = None;
self.peer_stats.rpc_stats.failed_to_send = 0;
self.peer_stats.rpc_stats.recent_lost_answers = 0;
assert!(!self.check_dead(cur_ts));
}
pub(super) fn _state_debug_info(&self, cur_ts: Timestamp) -> String {
let first_consecutive_seen_ts = if let Some(first_consecutive_seen_ts) =
self.peer_stats.rpc_stats.first_consecutive_seen_ts

View File

@ -743,6 +743,16 @@ impl RoutingTable {
out
}
pub fn clear_punishments(&self) {
let cur_ts = get_aligned_timestamp();
self.inner
.write()
.with_entries_mut(cur_ts, BucketEntryState::Dead, |rti, e| {
e.with_mut(rti, |_rti, ei| ei.set_punished(false));
Option::<()>::None
});
}
//////////////////////////////////////////////////////////////////////
// Find Nodes

View File

@ -65,13 +65,13 @@ pub(crate) trait NodeRefBase: Sized {
}
}
fn is_filter_dead(&self) -> bool {
if let Some(filter) = &self.common().filter {
filter.is_dead()
} else {
false
}
}
// fn is_filter_dead(&self) -> bool {
// if let Some(filter) = &self.common().filter {
// filter.is_dead()
// } else {
// false
// }
// }
fn routing_domain_set(&self) -> RoutingDomainSet {
self.common()
@ -117,15 +117,15 @@ pub(crate) trait NodeRefBase: Sized {
e.update_node_status(routing_domain, node_status);
});
}
fn envelope_support(&self) -> Vec<u8> {
self.operate(|_rti, e| e.envelope_support())
}
// fn envelope_support(&self) -> Vec<u8> {
// self.operate(|_rti, e| e.envelope_support())
// }
fn add_envelope_version(&self, envelope_version: u8) {
self.operate_mut(|_rti, e| e.add_envelope_version(envelope_version))
}
fn set_envelope_support(&self, envelope_support: Vec<u8>) {
self.operate_mut(|_rti, e| e.set_envelope_support(envelope_support))
}
// fn set_envelope_support(&self, envelope_support: Vec<u8>) {
// self.operate_mut(|_rti, e| e.set_envelope_support(envelope_support))
// }
fn best_envelope_version(&self) -> Option<u8> {
self.operate(|_rti, e| e.best_envelope_version())
}
@ -167,25 +167,25 @@ pub(crate) trait NodeRefBase: Sized {
fn set_seen_our_node_info_ts(&self, routing_domain: RoutingDomain, seen_ts: Timestamp) {
self.operate_mut(|_rti, e| e.set_seen_our_node_info_ts(routing_domain, seen_ts));
}
fn network_class(&self, routing_domain: RoutingDomain) -> Option<NetworkClass> {
self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.network_class()))
}
fn outbound_protocols(&self, routing_domain: RoutingDomain) -> Option<ProtocolTypeSet> {
self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.outbound_protocols()))
}
fn address_types(&self, routing_domain: RoutingDomain) -> Option<AddressTypeSet> {
self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.address_types()))
}
fn node_info_outbound_filter(&self, routing_domain: RoutingDomain) -> DialInfoFilter {
let mut dif = DialInfoFilter::all();
if let Some(outbound_protocols) = self.outbound_protocols(routing_domain) {
dif = dif.with_protocol_type_set(outbound_protocols);
}
if let Some(address_types) = self.address_types(routing_domain) {
dif = dif.with_address_type_set(address_types);
}
dif
}
// fn network_class(&self, routing_domain: RoutingDomain) -> Option<NetworkClass> {
// self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.network_class()))
// }
// fn outbound_protocols(&self, routing_domain: RoutingDomain) -> Option<ProtocolTypeSet> {
// self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.outbound_protocols()))
// }
// fn address_types(&self, routing_domain: RoutingDomain) -> Option<AddressTypeSet> {
// self.operate(|_rt, e| e.node_info(routing_domain).map(|n| n.address_types()))
// }
// fn node_info_outbound_filter(&self, routing_domain: RoutingDomain) -> DialInfoFilter {
// let mut dif = DialInfoFilter::all();
// if let Some(outbound_protocols) = self.outbound_protocols(routing_domain) {
// dif = dif.with_protocol_type_set(outbound_protocols);
// }
// if let Some(address_types) = self.address_types(routing_domain) {
// dif = dif.with_address_type_set(address_types);
// }
// dif
// }
fn relay(&self, routing_domain: RoutingDomain) -> EyreResult<Option<NodeRef>> {
self.operate_mut(|rti, e| {
let Some(sni) = e.signed_node_info(routing_domain) else {

View File

@ -1056,6 +1056,11 @@ impl RouteSpecStore {
// Set sequencing requirement
first_hop.set_sequencing(sequencing);
// Enforce the routing domain
first_hop.merge_filter(
NodeRefFilter::new().with_routing_domain(RoutingDomain::PublicInternet),
);
// Return the compiled safety route
//info!("compile_safety_route profile (stub): {} us", (get_timestamp() - profile_start_ts));
return Ok(CompiledRoute {
@ -1113,6 +1118,10 @@ impl RouteSpecStore {
// Ensure sequencing requirement is set on first hop
first_hop.set_sequencing(safety_spec.sequencing);
// Enforce the routing domain
first_hop
.merge_filter(NodeRefFilter::new().with_routing_domain(RoutingDomain::PublicInternet));
// Get the safety route secret key
let secret = safety_rsd.secret_key;

View File

@ -860,7 +860,7 @@ impl RoutingTableInner {
timestamp: Timestamp,
) -> EyreResult<NodeRef> {
let nr = self.create_node_ref(outer_self, &TypedKeyGroup::from(node_id), |_rti, e| {
// this node is live because it literally just connected to us
//e.make_not_dead(timestamp);
e.touch_last_seen(timestamp);
})?;
// set the most recent node address for connection finding and udp replies

View File

@ -286,7 +286,10 @@ impl RoutingTable {
{
Ok(NodeContactMethod::Direct(v)) => v,
Ok(v) => {
log_rtab!(warn "invalid contact method for bootstrap, ignoring peer: {:?}", v);
log_rtab!(debug "invalid contact method for bootstrap, ignoring peer: {:?}", v);
// let _ = routing_table
// .network_manager()
// .get_node_contact_method(nr.clone());
return;
}
Err(e) => {

View File

@ -149,6 +149,11 @@ impl RoutingTable {
inner.refresh_cached_entry_counts()
};
// Only do the rest if the network has started
if !self.network_manager().network_is_started() {
return Ok(());
}
let min_peer_count = self.with_config(|c| c.network.dht.min_peer_count as usize);
// Figure out which tables need bootstrap or peer minimum refresh

View File

@ -28,6 +28,14 @@ pub(crate) enum Destination {
},
}
/// Routing configuration for destination
#[derive(Debug, Clone)]
pub struct UnsafeRoutingInfo {
pub opt_node: Option<NodeRef>,
pub opt_relay: Option<NodeRef>,
pub opt_routing_domain: Option<RoutingDomain>,
}
impl Destination {
pub fn node(&self) -> Option<NodeRef> {
match self {
@ -138,6 +146,81 @@ impl Destination {
}
}
}
pub fn get_unsafe_routing_info(
&self,
routing_table: RoutingTable,
) -> Option<UnsafeRoutingInfo> {
// If there's a safety route in use, the safety route will be responsible for the routing
match self.get_safety_selection() {
SafetySelection::Unsafe(_) => {}
SafetySelection::Safe(_) => {
return None;
}
}
// Get:
// * The target node (possibly relayed)
// * The routing domain we are sending to if we can determine it
let (opt_node, opt_relay, opt_routing_domain) = match self {
Destination::Direct {
node,
safety_selection: _,
} => {
let opt_routing_domain = node.best_routing_domain();
if opt_routing_domain.is_none() {
// No routing domain for target, no node info
// Only a stale connection or no connection exists
log_rpc!(debug "No routing domain for node: node={}", node);
};
(Some(node.clone()), None, opt_routing_domain)
}
Destination::Relay {
relay,
node,
safety_selection: _,
} => {
// Outbound relays are defined as routing to and from PublicInternet only right now
// Resolve the relay for this target's routing domain and see if it matches this relay
let mut opt_routing_domain = None;
for target_rd in node.routing_domain_set() {
// Check out inbound/outbound relay to match routing domain
if let Some(relay_node) = routing_table.relay_node(target_rd) {
if relay.same_entry(&relay_node) {
// Relay for this destination is one of our routing domain relays (our inbound or outbound)
opt_routing_domain = Some(target_rd);
break;
}
}
// Check remote node's published relay to see if that who is relaying
if let Some(target_relay) = node.relay(target_rd).ok().flatten() {
if relay.same_entry(&target_relay) {
// Relay for this destination is one of its published relays
opt_routing_domain = Some(target_rd);
break;
}
}
}
if opt_routing_domain.is_none() {
// In the case of an unexpected relay, log it and don't pass any sender peer info into an unexpected relay
log_rpc!(debug "Unexpected relay used for node: relay={}, node={}", relay, node);
};
(Some(node.clone()), Some(relay.clone()), opt_routing_domain)
}
Destination::PrivateRoute {
private_route: _,
safety_selection: _,
} => (None, None, Some(RoutingDomain::PublicInternet)),
};
Some(UnsafeRoutingInfo {
opt_node,
opt_relay,
opt_routing_domain,
})
}
}
impl fmt::Display for Destination {

View File

@ -10,10 +10,16 @@ where
#[derive(Debug, Copy, Clone)]
pub(crate) enum FanoutResultKind {
Partial,
Timeout,
Finished,
Exhausted,
}
impl FanoutResultKind {
pub fn is_partial(&self) -> bool {
matches!(self, Self::Partial)
}
}
#[derive(Debug, Clone)]
pub(crate) struct FanoutResult {
@ -23,6 +29,7 @@ pub(crate) struct FanoutResult {
pub(crate) fn debug_fanout_result(result: &FanoutResult) -> String {
let kc = match result.kind {
FanoutResultKind::Partial => "P",
FanoutResultKind::Timeout => "T",
FanoutResultKind::Finished => "F",
FanoutResultKind::Exhausted => "E",

View File

@ -53,11 +53,13 @@ use storage_manager::*;
struct RPCMessageHeaderDetailDirect {
/// The decoded header of the envelope
envelope: Envelope,
/// The noderef of the peer that sent the message (not the original sender). Ensures node doesn't get evicted from routing table until we're done with it
/// The noderef of the peer that sent the message (not the original sender).
/// Ensures node doesn't get evicted from routing table until we're done with it
/// Should be filted to the routing domain of the peer that we received from
peer_noderef: NodeRef,
/// The flow from the peer sent the message (not the original sender)
flow: Flow,
/// The routing domain the message was sent through
/// The routing domain of the peer that we received from
routing_domain: RoutingDomain,
}
@ -869,51 +871,36 @@ impl RPCProcessor {
// Don't do this if the sender is to remain private
// Otherwise we would be attaching the original sender's identity to the final destination,
// thus defeating the purpose of the safety route entirely :P
match dest.get_safety_selection() {
SafetySelection::Unsafe(_) => {}
SafetySelection::Safe(_) => {
return SenderPeerInfo::default();
}
}
// Get the target we're sending to
let routing_table = self.routing_table();
let target = match dest {
Destination::Direct {
node: target,
safety_selection: _,
} => target.clone(),
Destination::Relay {
relay: _,
node: target,
safety_selection: _,
} => target.clone(),
Destination::PrivateRoute {
private_route: _,
safety_selection: _,
} => {
return SenderPeerInfo::default();
}
let Some(UnsafeRoutingInfo {
opt_node, opt_relay: _, opt_routing_domain
}) = dest.get_unsafe_routing_info(self.routing_table.clone()) else {
return SenderPeerInfo::default();
};
let Some(routing_domain) = target.best_routing_domain() else {
let Some(node) = opt_node else {
// If this is going over a private route, don't bother sending any sender peer info
// The other side won't accept it because peer info sent over a private route
// could be used to deanonymize the private route's endpoint
return SenderPeerInfo::default();
};
let Some(routing_domain) = opt_routing_domain else {
// No routing domain for target, no node info
// Only a stale connection or no connection exists
return SenderPeerInfo::default();
};
// Get the target's node info timestamp
let target_node_info_ts = target.node_info_ts(routing_domain);
let target_node_info_ts = node.node_info_ts(routing_domain);
// Return whatever peer info we have even if the network class is not yet valid
// That away we overwrite any prior existing valid-network-class nodeinfo in the remote routing table
let routing_table = self.routing_table();
let own_peer_info = routing_table.get_own_peer_info(routing_domain);
// Get our node info timestamp
let our_node_info_ts = own_peer_info.signed_node_info().timestamp();
// If the target has seen our node info already don't send it again
if target.has_seen_our_node_info_ts(routing_domain, our_node_info_ts) {
if node.has_seen_our_node_info_ts(routing_domain, our_node_info_ts) {
return SenderPeerInfo::new_no_peer_info(target_node_info_ts);
}
@ -1358,6 +1345,7 @@ impl RPCProcessor {
request: RPCMessage,
answer: RPCAnswer,
) ->RPCNetworkResult<()> {
// Extract destination from respond_to
let dest = network_result_try!(self.get_respond_to_destination(&request));

View File

@ -23,81 +23,38 @@ impl RPCProcessor {
self,
dest: Destination,
) -> RPCNetworkResult<Answer<Option<SenderInfo>>> {
let (opt_target_nr, routing_domain, node_status) = match dest.get_safety_selection() {
SafetySelection::Unsafe(_) => {
let (opt_target_nr, routing_domain) = match &dest {
Destination::Direct {
node: target,
safety_selection: _,
} => {
let routing_domain = match target.best_routing_domain() {
Some(rd) => rd,
None => {
// Because this exits before calling 'question()',
// a failure to find a routing domain constitutes a send failure
let send_ts = get_aligned_timestamp();
self.record_send_failure(
RPCKind::Question,
send_ts,
target.clone(),
None,
None,
);
return Ok(NetworkResult::no_connection_other(
"no routing domain for target",
));
}
};
(Some(target.clone()), routing_domain)
}
Destination::Relay {
relay,
node: target,
safety_selection: _,
} => {
let routing_domain = match relay.best_routing_domain() {
Some(rd) => rd,
None => {
// Because this exits before calling 'question()',
// a failure to find a routing domain constitutes a send failure for both the target and its relay
let send_ts = get_aligned_timestamp();
self.record_send_failure(
RPCKind::Question,
send_ts,
relay.clone(),
None,
None,
);
self.record_send_failure(
RPCKind::Question,
send_ts,
target.clone(),
None,
None,
);
return Ok(NetworkResult::no_connection_other(
"no routing domain for peer",
));
}
};
(Some(target.clone()), routing_domain)
}
Destination::PrivateRoute {
private_route: _,
safety_selection: _,
} => (None, RoutingDomain::PublicInternet),
};
// Determine routing domain and node status to send
let (opt_target_nr, routing_domain, node_status) = if let Some(UnsafeRoutingInfo {
opt_node,
opt_relay,
opt_routing_domain,
}) =
dest.get_unsafe_routing_info(self.routing_table.clone())
{
let Some(routing_domain) = opt_routing_domain else {
// Because this exits before calling 'question()',
// a failure to find a routing domain constitutes a send failure
// Record the send failure on both the node and its relay
let send_ts = get_aligned_timestamp();
if let Some(node) = &opt_node {
self.record_send_failure(RPCKind::Question, send_ts, node.clone(), None, None);
}
if let Some(relay) = &opt_relay {
self.record_send_failure(RPCKind::Question, send_ts, relay.clone(), None, None);
}
return Ok(NetworkResult::no_connection_other(
"no routing domain for target",
));
};
let node_status = Some(self.network_manager().generate_node_status(routing_domain));
(opt_target_nr, routing_domain, node_status)
}
SafetySelection::Safe(_) => {
let routing_domain = RoutingDomain::PublicInternet;
let node_status = None;
(None, routing_domain, node_status)
}
let node_status = Some(self.network_manager().generate_node_status(routing_domain));
(opt_node, routing_domain, node_status)
} else {
// Safety route means we don't exchange node status and things are all PublicInternet RoutingDomain
(None, RoutingDomain::PublicInternet, None)
};
// Create status rpc question
let status_q = RPCOperationStatusQ::new(node_status);
let question = RPCQuestion::new(
network_result_try!(self.get_destination_respond_to(&dest)?),

View File

@ -10,9 +10,12 @@ struct OutboundGetValueContext {
pub descriptor: Option<Arc<SignedValueDescriptor>>,
/// The parsed schema from the descriptor if we have one
pub schema: Option<DHTSchema>,
/// If we should send a partial update with the current context
pub send_partial_update: bool,
}
/// The result of the outbound_get_value operation
#[derive(Clone, Debug)]
pub(super) struct OutboundGetValueResult {
/// Fanout result
pub fanout_result: FanoutResult,
@ -29,7 +32,7 @@ impl StorageManager {
subkey: ValueSubkey,
safety_selection: SafetySelection,
last_get_result: GetResult,
) -> VeilidAPIResult<OutboundGetValueResult> {
) -> VeilidAPIResult<flume::Receiver<VeilidAPIResult<OutboundGetValueResult>>> {
let routing_table = rpc_processor.routing_table();
// Get the DHT parameters for 'GetValue'
@ -49,60 +52,76 @@ impl StorageManager {
inner.get_value_nodes(key)?.unwrap_or_default()
};
// Make do-get-value answer context
// Parse the schema
let schema = if let Some(d) = &last_get_result.opt_descriptor {
Some(d.schema()?)
} else {
None
};
// Make the return channel
let (out_tx, out_rx) = flume::unbounded::<VeilidAPIResult<OutboundGetValueResult>>();
// Make do-get-value answer context
let context = Arc::new(Mutex::new(OutboundGetValueContext {
value: last_get_result.opt_value,
value_nodes: vec![],
descriptor: last_get_result.opt_descriptor.clone(),
schema,
send_partial_update: false,
}));
// Routine to call to generate fanout
let call_routine = |next_node: NodeRef| {
let rpc_processor = rpc_processor.clone();
let call_routine = {
let context = context.clone();
let last_descriptor = last_get_result.opt_descriptor.clone();
async move {
let gva = network_result_try!(
rpc_processor
.clone()
.rpc_call_get_value(
Destination::direct(next_node.clone()).with_safety(safety_selection),
key,
subkey,
last_descriptor.map(|x| (*x).clone()),
)
.await?
);
// Keep the descriptor if we got one. If we had a last_descriptor it will
// already be validated by rpc_call_get_value
if let Some(descriptor) = gva.answer.descriptor {
let rpc_processor = rpc_processor.clone();
move |next_node: NodeRef| {
let context = context.clone();
let rpc_processor = rpc_processor.clone();
let last_descriptor = last_get_result.opt_descriptor.clone();
async move {
let gva = network_result_try!(
rpc_processor
.clone()
.rpc_call_get_value(
Destination::direct(next_node.clone())
.with_safety(safety_selection),
key,
subkey,
last_descriptor.map(|x| (*x).clone()),
)
.await?
);
let mut ctx = context.lock();
if ctx.descriptor.is_none() && ctx.schema.is_none() {
let schema = match descriptor.schema() {
Ok(v) => v,
Err(e) => {
return Ok(NetworkResult::invalid_message(e));
}
};
ctx.schema = Some(schema);
ctx.descriptor = Some(Arc::new(descriptor));
// Keep the descriptor if we got one. If we had a last_descriptor it will
// already be validated by rpc_call_get_value
if let Some(descriptor) = gva.answer.descriptor {
if ctx.descriptor.is_none() && ctx.schema.is_none() {
let schema = match descriptor.schema() {
Ok(v) => v,
Err(e) => {
return Ok(NetworkResult::invalid_message(e));
}
};
ctx.schema = Some(schema);
ctx.descriptor = Some(Arc::new(descriptor));
}
}
}
// Keep the value if we got one and it is newer and it passes schema validation
if let Some(value) = gva.answer.value {
log_dht!(debug "Got value back: len={} seq={}", value.value_data().data().len(), value.value_data().seq());
let mut ctx = context.lock();
// Keep the value if we got one and it is newer and it passes schema validation
let Some(value) = gva.answer.value else {
// Return peers if we have some
log_network_result!(debug "GetValue returned no value, fanout call returned peers {}", gva.answer.peers.len());
return Ok(NetworkResult::value(gva.answer.peers))
};
log_dht!(debug "GetValue got value back: len={} seq={}", value.value_data().data().len(), value.value_data().seq());
// Ensure we have a schema and descriptor
let (Some(descriptor), Some(schema)) = (&ctx.descriptor, &ctx.schema) else {
let (Some(descriptor), Some(schema)) = (&ctx.descriptor, &ctx.schema)
else {
// Got a value but no descriptor for it
// Move to the next node
return Ok(NetworkResult::invalid_message(
@ -133,7 +152,9 @@ impl StorageManager {
// If sequence number is the same, the data should be the same
if prior_value.value_data() != value.value_data() {
// Move to the next node
return Ok(NetworkResult::invalid_message("value data mismatch"));
return Ok(NetworkResult::invalid_message(
"value data mismatch",
));
}
// Increase the consensus count for the existing value
ctx.value_nodes.push(next_node);
@ -142,6 +163,8 @@ impl StorageManager {
ctx.value = Some(Arc::new(value));
// One node has shown us this value so far
ctx.value_nodes = vec![next_node];
// Send an update since the value changed
ctx.send_partial_update = true;
} else {
// If the sequence number is older, ignore it
}
@ -150,70 +173,184 @@ impl StorageManager {
ctx.value = Some(Arc::new(value));
// One node has shown us this value so far
ctx.value_nodes = vec![next_node];
// Send an update since the value changed
ctx.send_partial_update = true;
}
// Return peers if we have some
log_network_result!(debug "GetValue fanout call returned peers {}", gva.answer.peers.len());
Ok(NetworkResult::value(gva.answer.peers))
}
// Return peers if we have some
log_network_result!(debug "GetValue fanout call returned peers {}", gva.answer.peers.len());
Ok(NetworkResult::value(gva.answer.peers))
}
};
// Routine to call to check if we're done at each step
let check_done = |_closest_nodes: &[NodeRef]| {
// If we have reached sufficient consensus, return done
let ctx = context.lock();
if ctx.value.is_some()
&& ctx.descriptor.is_some()
&& ctx.value_nodes.len() >= consensus_count
{
return Some(());
let check_done = {
let context = context.clone();
let out_tx = out_tx.clone();
move |_closest_nodes: &[NodeRef]| {
let mut ctx = context.lock();
// send partial update if desired
if ctx.send_partial_update {
ctx.send_partial_update=false;
// return partial result
let fanout_result = FanoutResult {
kind: FanoutResultKind::Partial,
value_nodes: ctx.value_nodes.clone(),
};
if let Err(e) = out_tx.send(Ok(OutboundGetValueResult {
fanout_result,
get_result: GetResult {
opt_value: ctx.value.clone(),
opt_descriptor: ctx.descriptor.clone(),
},
})) {
log_dht!(debug "Sending partial GetValue result failed: {}", e);
}
}
// If we have reached sufficient consensus, return done
if ctx.value.is_some()
&& ctx.descriptor.is_some()
&& ctx.value_nodes.len() >= consensus_count
{
return Some(());
}
None
}
None
};
// Call the fanout
let fanout_call = FanoutCall::new(
routing_table.clone(),
// Call the fanout in a spawned task
spawn(Box::pin(async move {
let fanout_call = FanoutCall::new(
routing_table.clone(),
key,
key_count,
fanout,
timeout_us,
capability_fanout_node_info_filter(vec![CAP_DHT]),
call_routine,
check_done,
);
let kind = match fanout_call.run(init_fanout_queue).await {
// If we don't finish in the timeout (too much time passed checking for consensus)
TimeoutOr::Timeout => FanoutResultKind::Timeout,
// If we finished with or without consensus (enough nodes returning the same value)
TimeoutOr::Value(Ok(Some(()))) => FanoutResultKind::Finished,
// If we ran out of nodes before getting consensus)
TimeoutOr::Value(Ok(None)) => FanoutResultKind::Exhausted,
// Failed
TimeoutOr::Value(Err(e)) => {
// If we finished with an error, return that
log_dht!(debug "GetValue fanout error: {}", e);
if let Err(e) = out_tx.send(Err(e.into())) {
log_dht!(debug "Sending GetValue fanout error failed: {}", e);
}
return;
}
};
let ctx = context.lock();
let fanout_result = FanoutResult {
kind,
value_nodes: ctx.value_nodes.clone(),
};
log_network_result!(debug "GetValue Fanout: {:?}", fanout_result);
if let Err(e) = out_tx.send(Ok(OutboundGetValueResult {
fanout_result,
get_result: GetResult {
opt_value: ctx.value.clone(),
opt_descriptor: ctx.descriptor.clone(),
},
})) {
log_dht!(debug "Sending GetValue result failed: {}", e);
}
}))
.detach();
Ok(out_rx)
}
pub(super) fn process_deferred_outbound_get_value_result_inner(&self, inner: &mut StorageManagerInner, res_rx: flume::Receiver<Result<get_value::OutboundGetValueResult, VeilidAPIError>>, key: TypedKey, subkey: ValueSubkey, last_seq: ValueSeqNum) {
let this = self.clone();
inner.process_deferred_results(
res_rx,
Box::new(
move |result: VeilidAPIResult<get_value::OutboundGetValueResult>| -> SendPinBoxFuture<bool> {
let this = this.clone();
Box::pin(async move {
let result = match result {
Ok(v) => v,
Err(e) => {
log_rtab!(debug "Deferred fanout error: {}", e);
return false;
}
};
let is_partial = result.fanout_result.kind.is_partial();
let value_data = match this.process_outbound_get_value_result(key, subkey, Some(last_seq), result).await {
Ok(Some(v)) => v,
Ok(None) => {
return is_partial;
}
Err(e) => {
log_rtab!(debug "Deferred fanout error: {}", e);
return false;
}
};
if is_partial {
// If more partial results show up, don't send an update until we're done
return true;
}
// If we processed the final result, possibly send an update
// if the sequence number changed since our first partial update
// Send with a max count as this is not attached to any watch
if last_seq != value_data.seq() {
if let Err(e) = this.update_callback_value_change(key,ValueSubkeyRangeSet::single(subkey), u32::MAX, Some(value_data)).await {
log_rtab!(debug "Failed sending deferred fanout value change: {}", e);
}
}
// Return done
false
})
},
),
);
}
pub(super) async fn process_outbound_get_value_result(&self, key: TypedKey, subkey: ValueSubkey, opt_last_seq: Option<u32>, result: get_value::OutboundGetValueResult) -> Result<Option<ValueData>, VeilidAPIError> {
// See if we got a value back
let Some(get_result_value) = result.get_result.opt_value else {
// If we got nothing back then we also had nothing beforehand, return nothing
return Ok(None);
};
// Keep the list of nodes that returned a value for later reference
let mut inner = self.lock().await?;
inner.process_fanout_results(
key,
key_count,
fanout,
timeout_us,
capability_fanout_node_info_filter(vec![CAP_DHT]),
call_routine,
check_done,
core::iter::once((subkey, &result.fanout_result)),
false,
);
let kind = match fanout_call.run(init_fanout_queue).await {
// If we don't finish in the timeout (too much time passed checking for consensus)
TimeoutOr::Timeout => FanoutResultKind::Timeout,
// If we finished with or without consensus (enough nodes returning the same value)
TimeoutOr::Value(Ok(Some(()))) => FanoutResultKind::Finished,
// If we ran out of nodes before getting consensus)
TimeoutOr::Value(Ok(None)) => FanoutResultKind::Exhausted,
// Failed
TimeoutOr::Value(Err(e)) => {
// If we finished with an error, return that
log_dht!(debug "GetValue Fanout Error: {}", e);
return Err(e.into());
}
};
let ctx = context.lock();
let fanout_result = FanoutResult {
kind,
value_nodes: ctx.value_nodes.clone(),
};
log_network_result!(debug "GetValue Fanout: {:?}", fanout_result);
Ok(OutboundGetValueResult {
fanout_result,
get_result: GetResult {
opt_value: ctx.value.clone(),
opt_descriptor: ctx.descriptor.clone(),
},
})
// If we got a new value back then write it to the opened record
if Some(get_result_value.value_data().seq()) != opt_last_seq {
inner
.handle_set_local_value(
key,
subkey,
get_result_value.clone(),
WatchUpdateMode::UpdateAll,
)
.await?;
}
Ok(Some(get_result_value.value_data().clone()))
}
/// Handle a received 'Get Value' query

View File

@ -264,7 +264,7 @@ impl StorageManager {
// No last descriptor, no last value
// Use the safety selection we opened the record with
let subkey: ValueSubkey = 0;
let result = self
let res_rx = self
.outbound_get_value(
rpc_processor,
key,
@ -273,12 +273,24 @@ impl StorageManager {
GetResult::default(),
)
.await?;
// Wait for the first result
let Ok(result) = res_rx.recv_async().await else {
apibail_internal!("failed to receive results");
};
let result = result?;
// If we got nothing back, the key wasn't found
if result.get_result.opt_value.is_none() && result.get_result.opt_descriptor.is_none() {
// No result
apibail_key_not_found!(key);
};
let last_seq = result
.get_result
.opt_value
.as_ref()
.unwrap()
.value_data()
.seq();
// Reopen inner to store value we just got
let mut inner = self.lock().await?;
@ -295,9 +307,16 @@ impl StorageManager {
}
// Open the new record
inner
let out = inner
.open_new_record(key, writer, subkey, result.get_result, safety_selection)
.await
.await;
if out.is_ok() {
self.process_deferred_outbound_get_value_result_inner(
&mut inner, res_rx, key, subkey, last_seq,
);
}
out
}
/// Close an opened local record
@ -402,7 +421,7 @@ impl StorageManager {
.opt_value
.as_ref()
.map(|v| v.value_data().seq());
let result = self
let res_rx = self
.outbound_get_value(
rpc_processor,
key,
@ -412,32 +431,33 @@ impl StorageManager {
)
.await?;
// See if we got a value back
let Some(get_result_value) = result.get_result.opt_value else {
// If we got nothing back then we also had nothing beforehand, return nothing
return Ok(None);
// Wait for the first result
let Ok(result) = res_rx.recv_async().await else {
apibail_internal!("failed to receive results");
};
let result = result?;
let partial = result.fanout_result.kind.is_partial();
// Keep the list of nodes that returned a value for later reference
let mut inner = self.lock().await?;
inner.process_fanout_results(
key,
core::iter::once((subkey, &result.fanout_result)),
false,
);
// Process the returned result
let out = self
.process_outbound_get_value_result(key, subkey, opt_last_seq, result)
.await?;
// If we got a new value back then write it to the opened record
if Some(get_result_value.value_data().seq()) != opt_last_seq {
inner
.handle_set_local_value(
if let Some(out) = &out {
// If there's more to process, do it in the background
if partial {
let mut inner = self.lock().await?;
self.process_deferred_outbound_get_value_result_inner(
&mut inner,
res_rx,
key,
subkey,
get_result_value.clone(),
WatchUpdateMode::UpdateAll,
)
.await?;
out.seq(),
);
}
}
Ok(Some(get_result_value.value_data().clone()))
Ok(out)
}
/// Set the value of a subkey on an opened local record
@ -537,7 +557,7 @@ impl StorageManager {
log_stor!(debug "Writing subkey to the network: {}:{} len={}", key, subkey, signed_value_data.value_data().data().len() );
// Use the safety selection we opened the record with
let result = match self
let res_rx = match self
.outbound_set_value(
rpc_processor,
key,
@ -557,36 +577,39 @@ impl StorageManager {
}
};
// Regain the lock after network access
let mut inner = self.lock().await?;
// Wait for the first result
let Ok(result) = res_rx.recv_async().await else {
apibail_internal!("failed to receive results");
};
let result = result?;
let partial = result.fanout_result.kind.is_partial();
// Report on fanout result offline
let was_offline = self.check_fanout_set_offline(key, subkey, &result.fanout_result);
if was_offline {
// Failed to write, try again later
inner.add_offline_subkey_write(key, subkey, safety_selection);
// Process the returned result
let out = self
.process_outbound_set_value_result(
key,
subkey,
signed_value_data.value_data().clone(),
safety_selection,
result,
)
.await?;
// If there's more to process, do it in the background
if partial {
let mut inner = self.lock().await?;
self.process_deferred_outbound_set_value_result_inner(
&mut inner,
res_rx,
key,
subkey,
out.clone()
.unwrap_or_else(|| signed_value_data.value_data().clone()),
safety_selection,
);
}
// Keep the list of nodes that returned a value for later reference
inner.process_fanout_results(key, core::iter::once((subkey, &result.fanout_result)), true);
// Return the new value if it differs from what was asked to set
if result.signed_value_data.value_data() != signed_value_data.value_data() {
// Record the newer value and send and update since it is different than what we just set
inner
.handle_set_local_value(
key,
subkey,
result.signed_value_data.clone(),
WatchUpdateMode::UpdateAll,
)
.await?;
return Ok(Some(result.signed_value_data.value_data().clone()));
}
// If the original value was set, return None
Ok(None)
Ok(out)
}
/// Create,update or cancel an outbound watch to a DHT value
@ -920,6 +943,31 @@ impl StorageManager {
Ok(())
}
// Send a value change up through the callback
#[instrument(level = "trace", skip(self), err)]
async fn update_callback_value_change(
&self,
key: TypedKey,
subkeys: ValueSubkeyRangeSet,
count: u32,
value: Option<ValueData>,
) -> Result<(), VeilidAPIError> {
let opt_update_callback = {
let inner = self.lock().await?;
inner.update_callback.clone()
};
if let Some(update_callback) = opt_update_callback {
update_callback(VeilidUpdate::ValueChange(Box::new(VeilidValueChange {
key,
subkeys,
count,
value,
})));
}
Ok(())
}
fn check_fanout_set_offline(
&self,
key: TypedKey,
@ -927,6 +975,7 @@ impl StorageManager {
fanout_result: &FanoutResult,
) -> bool {
match fanout_result.kind {
FanoutResultKind::Partial => false,
FanoutResultKind::Timeout => {
log_stor!(debug "timeout in set_value, adding offline subkey: {}:{}", key, subkey);
true

View File

@ -10,9 +10,12 @@ struct OutboundSetValueContext {
pub missed_since_last_set: usize,
/// The parsed schema from the descriptor if we have one
pub schema: DHTSchema,
/// If we should send a partial update with the current context
pub send_partial_update: bool,
}
/// The result of the outbound_set_value operation
#[derive(Clone, Debug)]
pub(super) struct OutboundSetValueResult {
/// Fanout result
pub fanout_result: FanoutResult,
@ -30,7 +33,7 @@ impl StorageManager {
safety_selection: SafetySelection,
value: Arc<SignedValueData>,
descriptor: Arc<SignedValueDescriptor>,
) -> VeilidAPIResult<OutboundSetValueResult> {
) -> VeilidAPIResult<flume::Receiver<VeilidAPIResult<OutboundSetValueResult>>> {
let routing_table = rpc_processor.routing_table();
// Get the DHT parameters for 'SetValue'
@ -50,6 +53,9 @@ impl StorageManager {
inner.get_value_nodes(key)?.unwrap_or_default()
};
// Make the return channel
let (out_tx, out_rx) = flume::unbounded::<VeilidAPIResult<OutboundSetValueResult>>();
// Make do-set-value answer context
let schema = descriptor.schema()?;
let context = Arc::new(Mutex::new(OutboundSetValueContext {
@ -57,151 +63,330 @@ impl StorageManager {
value_nodes: vec![],
missed_since_last_set: 0,
schema,
send_partial_update: false,
}));
// Routine to call to generate fanout
let call_routine = |next_node: NodeRef| {
let rpc_processor = rpc_processor.clone();
let call_routine = {
let context = context.clone();
let descriptor = descriptor.clone();
async move {
let send_descriptor = true; // xxx check if next_node needs the descriptor or not
let rpc_processor = rpc_processor.clone();
// get most recent value to send
let value = {
let ctx = context.lock();
ctx.value.clone()
};
move |next_node: NodeRef| {
let rpc_processor = rpc_processor.clone();
let context = context.clone();
let descriptor = descriptor.clone();
async move {
let send_descriptor = true; // xxx check if next_node needs the descriptor or not
// send across the wire
let sva = network_result_try!(
rpc_processor
.clone()
.rpc_call_set_value(
Destination::direct(next_node.clone()).with_safety(safety_selection),
key,
subkey,
(*value).clone(),
(*descriptor).clone(),
send_descriptor,
)
.await?
);
// get most recent value to send
let value = {
let ctx = context.lock();
ctx.value.clone()
};
// If the node was close enough to possibly set the value
if sva.answer.set {
// send across the wire
let sva = network_result_try!(
rpc_processor
.clone()
.rpc_call_set_value(
Destination::direct(next_node.clone())
.with_safety(safety_selection),
key,
subkey,
(*value).clone(),
(*descriptor).clone(),
send_descriptor,
)
.await?
);
// If the node was close enough to possibly set the value
let mut ctx = context.lock();
if !sva.answer.set {
ctx.missed_since_last_set += 1;
// Keep the value if we got one and it is newer and it passes schema validation
if let Some(value) = sva.answer.value {
log_dht!(debug "Got value back: len={} seq={}", value.value_data().data().len(), value.value_data().seq());
// Return peers if we have some
log_network_result!(debug "SetValue missed: {}, fanout call returned peers {}", ctx.missed_since_last_set, sva.answer.peers.len());
return Ok(NetworkResult::value(sva.answer.peers));
}
// Validate with schema
if !ctx.schema.check_subkey_value_data(
descriptor.owner(),
subkey,
value.value_data(),
) {
// Validation failed, ignore this value and pretend we never saw this node
return Ok(NetworkResult::invalid_message("Schema validation failed"));
}
// If we got a value back it should be different than the one we are setting
if ctx.value.value_data() == value.value_data() {
// Move to the next node
return Ok(NetworkResult::invalid_message("same value returned"));
}
// We have a prior value, ensure this is a newer sequence number
let prior_seq = ctx.value.value_data().seq();
let new_seq = value.value_data().seq();
if new_seq >= prior_seq {
// If the sequence number is greater or equal, keep it
ctx.value = Arc::new(value);
// One node has shown us this value so far
ctx.value_nodes = vec![next_node];
ctx.missed_since_last_set = 0;
} else {
// If the sequence number is older, or an equal sequence number,
// node should have not returned a value here.
// Skip this node and its closer list because it is misbehaving
return Ok(NetworkResult::invalid_message("Sequence number is older"));
}
} else {
// It was set on this node and no newer value was found and returned,
// so increase our consensus count
// See if we got a value back
let Some(value) = sva.answer.value else {
// No newer value was found and returned, so increase our consensus count
ctx.value_nodes.push(next_node);
ctx.missed_since_last_set = 0;
// Send an update since it was set
if ctx.value_nodes.len() == 1 {
ctx.send_partial_update = true;
}
// Return peers if we have some
log_network_result!(debug "SetValue returned no value, fanout call returned peers {}", sva.answer.peers.len());
return Ok(NetworkResult::value(sva.answer.peers));
};
// Keep the value if we got one and it is newer and it passes schema validation
log_dht!(debug "SetValue got value back: len={} seq={}", value.value_data().data().len(), value.value_data().seq());
// Validate with schema
if !ctx.schema.check_subkey_value_data(
descriptor.owner(),
subkey,
value.value_data(),
) {
// Validation failed, ignore this value and pretend we never saw this node
return Ok(NetworkResult::invalid_message(format!(
"Schema validation failed on subkey {}",
subkey
)));
}
} else {
let mut ctx = context.lock();
ctx.missed_since_last_set += 1;
// If we got a value back it should be different than the one we are setting
// But in the case of a benign bug, we can just move to the next node
if ctx.value.value_data() == value.value_data() {
ctx.value_nodes.push(next_node);
ctx.missed_since_last_set = 0;
// Send an update since it was set
if ctx.value_nodes.len() == 1 {
ctx.send_partial_update = true;
}
return Ok(NetworkResult::value(sva.answer.peers));
}
// We have a prior value, ensure this is a newer sequence number
let prior_seq = ctx.value.value_data().seq();
let new_seq = value.value_data().seq();
if new_seq < prior_seq {
// If the sequence number is older node should have not returned a value here.
// Skip this node and its closer list because it is misbehaving
// Ignore this value and pretend we never saw this node
return Ok(NetworkResult::invalid_message("Sequence number is older"));
}
// If the sequence number is greater or equal, keep it
// even if the sequence number is the same, accept all conflicts in an attempt to resolve them
ctx.value = Arc::new(value);
// One node has shown us this value so far
ctx.value_nodes = vec![next_node];
ctx.missed_since_last_set = 0;
// Send an update since the value changed
ctx.send_partial_update = true;
Ok(NetworkResult::value(sva.answer.peers))
}
// Return peers if we have some
log_network_result!(debug "SetValue fanout call returned peers {}", sva.answer.peers.len());
Ok(NetworkResult::value(sva.answer.peers))
}
};
// Routine to call to check if we're done at each step
let check_done = |_closest_nodes: &[NodeRef]| {
let check_done = {
let context = context.clone();
let out_tx = out_tx.clone();
move |_closest_nodes: &[NodeRef]| {
let mut ctx = context.lock();
// send partial update if desired
if ctx.send_partial_update {
ctx.send_partial_update = false;
// return partial result
let fanout_result = FanoutResult {
kind: FanoutResultKind::Partial,
value_nodes: ctx.value_nodes.clone(),
};
let out=OutboundSetValueResult {
fanout_result,
signed_value_data: ctx.value.clone()};
log_dht!(debug "Sending partial SetValue result: {:?}", out);
if let Err(e) = out_tx.send(Ok(out)) {
log_dht!(debug "Sending partial SetValue result failed: {}", e);
}
}
// If we have reached sufficient consensus, return done
if ctx.value_nodes.len() >= consensus_count {
return Some(());
}
// If we have missed more than our consensus count since our last set, return done
// This keeps the traversal from searching too many nodes when we aren't converging
// Only do this if we have gotten at least half our desired sets.
if ctx.value_nodes.len() >= ((consensus_count + 1) / 2)
&& ctx.missed_since_last_set >= consensus_count
{
return Some(());
}
None
}
};
// Call the fanout in a spawned task
spawn(Box::pin(async move {
let fanout_call = FanoutCall::new(
routing_table.clone(),
key,
key_count,
fanout,
timeout_us,
capability_fanout_node_info_filter(vec![CAP_DHT]),
call_routine,
check_done,
);
let kind = match fanout_call.run(init_fanout_queue).await {
// If we don't finish in the timeout (too much time passed checking for consensus)
TimeoutOr::Timeout => FanoutResultKind::Timeout,
// If we finished with or without consensus (enough nodes returning the same value)
TimeoutOr::Value(Ok(Some(()))) => FanoutResultKind::Finished,
// If we ran out of nodes before getting consensus)
TimeoutOr::Value(Ok(None)) => FanoutResultKind::Exhausted,
// Failed
TimeoutOr::Value(Err(e)) => {
// If we finished with an error, return that
log_dht!(debug "SetValue fanout error: {}", e);
if let Err(e) = out_tx.send(Err(e.into())) {
log_dht!(debug "Sending SetValue fanout error failed: {}", e);
}
return;
}
};
let ctx = context.lock();
let fanout_result = FanoutResult {
kind,
value_nodes: ctx.value_nodes.clone(),
};
log_network_result!(debug "SetValue Fanout: {:?}", fanout_result);
// If we have reached sufficient consensus, return done
if ctx.value_nodes.len() >= consensus_count {
return Some(());
if let Err(e) = out_tx.send(Ok(OutboundSetValueResult {
fanout_result,
signed_value_data: ctx.value.clone(),
})) {
log_dht!(debug "Sending SetValue result failed: {}", e);
}
// If we have missed more than our consensus count since our last set, return done
// This keeps the traversal from searching too many nodes when we aren't converging
// Only do this if we have gotten at least half our desired sets.
if ctx.value_nodes.len() >= ((consensus_count + 1) / 2)
&& ctx.missed_since_last_set >= consensus_count
{
return Some(());
}
None
};
}))
.detach();
// Call the fanout
let fanout_call = FanoutCall::new(
routing_table.clone(),
key,
key_count,
fanout,
timeout_us,
capability_fanout_node_info_filter(vec![CAP_DHT]),
call_routine,
check_done,
);
let kind = match fanout_call.run(init_fanout_queue).await {
// If we don't finish in the timeout (too much time passed checking for consensus)
TimeoutOr::Timeout => FanoutResultKind::Timeout,
// If we finished with or without consensus (enough nodes returning the same value)
TimeoutOr::Value(Ok(Some(()))) => FanoutResultKind::Finished,
// If we ran out of nodes before getting consensus)
TimeoutOr::Value(Ok(None)) => FanoutResultKind::Exhausted,
// Failed
TimeoutOr::Value(Err(e)) => {
// If we finished with an error, return that
log_dht!(debug "SetValue Fanout Error: {}", e);
return Err(e.into());
}
};
let ctx = context.lock();
let fanout_result = FanoutResult {
kind,
value_nodes: ctx.value_nodes.clone(),
};
log_network_result!(debug "SetValue Fanout: {:?}", fanout_result);
Ok(OutboundSetValueResult {
fanout_result,
signed_value_data: ctx.value.clone(),
})
Ok(out_rx)
}
pub(super) fn process_deferred_outbound_set_value_result_inner(&self, inner: &mut StorageManagerInner,
res_rx: flume::Receiver<Result<set_value::OutboundSetValueResult, VeilidAPIError>>,
key: TypedKey, subkey: ValueSubkey, last_value_data: ValueData, safety_selection: SafetySelection, ) {
let this = self.clone();
let last_value_data = Arc::new(Mutex::new(last_value_data));
inner.process_deferred_results(
res_rx,
Box::new(
move |result: VeilidAPIResult<set_value::OutboundSetValueResult>| -> SendPinBoxFuture<bool> {
let this = this.clone();
let last_value_data = last_value_data.clone();
Box::pin(async move {
let result = match result {
Ok(v) => v,
Err(e) => {
log_rtab!(debug "Deferred fanout error: {}", e);
return false;
}
};
let is_partial = result.fanout_result.kind.is_partial();
let lvd = last_value_data.lock().clone();
let value_data = match this.process_outbound_set_value_result(key, subkey, lvd, safety_selection, result).await {
Ok(Some(v)) => v,
Ok(None) => {
return is_partial;
}
Err(e) => {
log_rtab!(debug "Deferred fanout error: {}", e);
return false;
}
};
if is_partial {
// If more partial results show up, don't send an update until we're done
return true;
}
// If we processed the final result, possibly send an update
// if the sequence number changed since our first partial update
// Send with a max count as this is not attached to any watch
let changed = {
let mut lvd = last_value_data.lock();
if lvd.seq() != value_data.seq() {
*lvd = value_data.clone();
true
} else {
false
}
};
if changed {
if let Err(e) = this.update_callback_value_change(key,ValueSubkeyRangeSet::single(subkey), u32::MAX, Some(value_data)).await {
log_rtab!(debug "Failed sending deferred fanout value change: {}", e);
}
}
// Return done
false
})
},
),
);
}
pub(super) async fn process_outbound_set_value_result(&self, key: TypedKey, subkey: ValueSubkey, last_value_data: ValueData, safety_selection: SafetySelection, result: set_value::OutboundSetValueResult) -> Result<Option<ValueData>, VeilidAPIError> {
// Regain the lock after network access
let mut inner = self.lock().await?;
// Report on fanout result offline
let was_offline = self.check_fanout_set_offline(key, subkey, &result.fanout_result);
if was_offline {
// Failed to write, try again later
inner.add_offline_subkey_write(key, subkey, safety_selection);
}
// Keep the list of nodes that returned a value for later reference
inner.process_fanout_results(key, core::iter::once((subkey, &result.fanout_result)), true);
// Return the new value if it differs from what was asked to set
if result.signed_value_data.value_data() != &last_value_data {
// Record the newer value and send and update since it is different than what we just set
inner
.handle_set_local_value(
key,
subkey,
result.signed_value_data.clone(),
WatchUpdateMode::UpdateAll,
)
.await?;
return Ok(Some(result.signed_value_data.value_data().clone()));
}
// If the original value was set, return None
Ok(None)
}
/// Handle a received 'Set Value' query
/// Returns a None if the value passed in was set
@ -232,8 +417,17 @@ impl StorageManager {
// Make sure this value would actually be newer
if let Some(last_value) = &last_get_result.opt_value {
if value.value_data().seq() <= last_value.value_data().seq() {
// inbound value is older than or equal to the sequence number that we have, just return the one we have
if value.value_data().seq() < last_value.value_data().seq() {
// inbound value is older than the sequence number that we have, just return the one we have
return Ok(NetworkResult::value(Some(last_value.clone())));
} else if value.value_data().seq() == last_value.value_data().seq() {
// inbound value is equal to the sequence number that we have
// if the value is the same including the writer, return nothing,
// otherwise return the existing value because it was here first
if value.value_data() == last_value.value_data() {
return Ok(NetworkResult::value(None));
}
// sequence number is the same but there's a value conflict, return what we have
return Ok(NetworkResult::value(Some(last_value.clone())));
}
}

View File

@ -32,6 +32,8 @@ pub(super) struct StorageManagerInner {
pub tick_future: Option<SendPinBoxFuture<()>>,
/// Update callback to send ValueChanged notification to
pub update_callback: Option<UpdateCallback>,
/// Deferred result processor
pub deferred_result_processor: DeferredStreamProcessor,
/// The maximum consensus count
set_consensus_count: usize,
@ -88,6 +90,7 @@ impl StorageManagerInner {
opt_routing_table: Default::default(),
tick_future: Default::default(),
update_callback: None,
deferred_result_processor: DeferredStreamProcessor::default(),
set_consensus_count,
}
}
@ -126,6 +129,9 @@ impl StorageManagerInner {
self.load_metadata().await?;
// Start deferred results processors
self.deferred_result_processor.init().await;
// Schedule tick
let tick_future = interval(1000, move || {
let this = outer_self.clone();
@ -151,6 +157,9 @@ impl StorageManagerInner {
f.await;
}
// Stop deferred result processor
self.deferred_result_processor.terminate().await;
// Final flush on record stores
if let Some(mut local_record_store) = self.local_record_store.take() {
if let Err(e) = local_record_store.flush().await {
@ -708,4 +717,12 @@ impl StorageManagerInner {
subkeys: ValueSubkeyRangeSet::single(subkey),
});
}
pub fn process_deferred_results<T: Send + 'static>(
&mut self,
receiver: flume::Receiver<T>,
handler: impl FnMut(T) -> SendPinBoxFuture<bool> + Send + 'static,
) -> bool {
self.deferred_result_processor.add(receiver, handler)
}
}

View File

@ -68,24 +68,50 @@ impl StorageManager {
)
.await;
match osvres {
Ok(result) => {
let was_offline =
self.check_fanout_set_offline(*key, subkey, &result.fanout_result);
if !was_offline {
if let Some(update_callback) = opt_update_callback.clone() {
// Send valuechange with dead count and no subkeys
update_callback(VeilidUpdate::ValueChange(Box::new(
VeilidValueChange {
key: *key,
subkeys: ValueSubkeyRangeSet::single(subkey),
count: u32::MAX,
value: Some(result.signed_value_data.value_data().clone()),
},
)));
Ok(res_rx) => {
while let Ok(res) = res_rx.recv_async().await {
match res {
Ok(result) => {
let partial = result.fanout_result.kind.is_partial();
// Skip partial results in offline subkey write mode
if partial {
continue;
}
// Process non-partial setvalue result
let was_offline = self.check_fanout_set_offline(
*key,
subkey,
&result.fanout_result,
);
if !was_offline {
if let Some(update_callback) = opt_update_callback.clone() {
// Send valuechange with dead count and no subkeys
update_callback(VeilidUpdate::ValueChange(Box::new(
VeilidValueChange {
key: *key,
subkeys: ValueSubkeyRangeSet::single(subkey),
count: u32::MAX,
value: Some(
result
.signed_value_data
.value_data()
.clone(),
),
},
)));
}
written_subkeys.insert(subkey);
};
fanout_results.push((subkey, result.fanout_result));
break;
}
Err(e) => {
log_stor!(debug "failed to get offline subkey write result: {}:{} {}", key, subkey, e);
break;
}
}
written_subkeys.insert(subkey);
};
fanout_results.push((subkey, result.fanout_result));
}
}
Err(e) => {
log_stor!(debug "failed to write offline subkey: {}:{} {}", key, subkey, e);

View File

@ -417,7 +417,7 @@ impl StorageManager {
watch_id: u64,
) -> VeilidAPIResult<NetworkResult<()>> {
// Update local record store with new value
let (is_value_seq_newer, opt_update_callback, value) = {
let (is_value_seq_newer, value) = {
let mut inner = self.lock().await?;
// Don't process update if the record is closed
@ -516,7 +516,7 @@ impl StorageManager {
}
}
(is_value_seq_newer, inner.update_callback.clone(), value)
(is_value_seq_newer, value)
};
// Announce ValueChanged VeilidUpdate
@ -526,18 +526,13 @@ impl StorageManager {
let do_update = is_value_seq_newer || subkeys.len() > 1 || count == 0;
if do_update {
if let Some(update_callback) = opt_update_callback {
update_callback(VeilidUpdate::ValueChange(Box::new(VeilidValueChange {
key,
subkeys,
count,
value: if is_value_seq_newer {
Some(value.unwrap().value_data().clone())
} else {
None
},
})));
}
let value = if is_value_seq_newer {
Some(value.unwrap().value_data().clone())
} else {
None
};
self.update_callback_value_change(key, subkeys, count, value)
.await?;
}
Ok(NetworkResult::value(()))

View File

@ -853,15 +853,20 @@ impl VeilidAPI {
Ok("Buckets purged".to_owned())
} else if args[0] == "connections" {
// Purge connection table
let connection_manager = self.network_manager()?.connection_manager();
connection_manager.shutdown().await;
let opt_connection_manager = self.network_manager()?.opt_connection_manager();
if let Some(connection_manager) = &opt_connection_manager {
connection_manager.shutdown().await;
}
// Eliminate last_connections from routing table entries
self.network_manager()?
.routing_table()
.purge_last_connections();
connection_manager.startup().await;
if let Some(connection_manager) = &opt_connection_manager {
connection_manager.startup().await;
}
Ok("Connections purged".to_owned())
} else if args[0] == "routes" {
@ -940,6 +945,9 @@ impl VeilidAPI {
async fn debug_resolve(&self, args: String) -> VeilidAPIResult<String> {
let netman = self.network_manager()?;
let routing_table = netman.routing_table();
let Some(_rpc) = netman.opt_rpc_processor() else {
apibail_internal!("Must be attached first");
};
let args: Vec<String> = args.split_whitespace().map(|s| s.to_owned()).collect();
@ -981,7 +989,9 @@ impl VeilidAPI {
async fn debug_ping(&self, args: String) -> VeilidAPIResult<String> {
let netman = self.network_manager()?;
let routing_table = netman.routing_table();
let rpc = netman.rpc_processor();
let Some(rpc) = netman.opt_rpc_processor() else {
apibail_internal!("Must be attached first");
};
let args: Vec<String> = args.split_whitespace().map(|s| s.to_owned()).collect();
@ -1012,7 +1022,9 @@ impl VeilidAPI {
async fn debug_app_message(&self, args: String) -> VeilidAPIResult<String> {
let netman = self.network_manager()?;
let routing_table = netman.routing_table();
let rpc = netman.rpc_processor();
let Some(rpc) = netman.opt_rpc_processor() else {
apibail_internal!("Must be attached first");
};
let (arg, rest) = args.split_once(' ').unwrap_or((&args, ""));
let rest = rest.trim_start().to_owned();
@ -1046,7 +1058,9 @@ impl VeilidAPI {
async fn debug_app_call(&self, args: String) -> VeilidAPIResult<String> {
let netman = self.network_manager()?;
let routing_table = netman.routing_table();
let rpc = netman.rpc_processor();
let Some(rpc) = netman.opt_rpc_processor() else {
apibail_internal!("Must be attached first");
};
let (arg, rest) = args.split_once(' ').unwrap_or((&args, ""));
let rest = rest.trim_start().to_owned();
@ -1083,7 +1097,9 @@ impl VeilidAPI {
async fn debug_app_reply(&self, args: String) -> VeilidAPIResult<String> {
let netman = self.network_manager()?;
let rpc = netman.rpc_processor();
let Some(rpc) = netman.opt_rpc_processor() else {
apibail_internal!("Must be attached first");
};
let (call_id, data) = if let Some(stripped_args) = args.strip_prefix('#') {
let (arg, rest) = stripped_args.split_once(' ').unwrap_or((&args, ""));

View File

@ -416,7 +416,7 @@ impl RoutingContext {
/// This is useful for checking if you should push new subkeys to the network, or retrieve the current state of a record from the network
/// to see what needs updating locally.
///
/// * `key` is the record key to watch. it must first be opened for reading or writing.
/// * `key` is the record key to inspect. it must first be opened for reading or writing.
/// * `subkeys` is the the range of subkeys to inspect. The range must not exceed 512 discrete non-overlapping or adjacent subranges.
/// If no range is specified, this is equivalent to inspecting the entire range of subkeys. In total, the list of subkeys returned will be truncated at 512 elements.
/// * `scope` is what kind of range the inspection has:

View File

@ -17,6 +17,21 @@ pub enum AttachmentState {
OverAttached = 6,
Detaching = 7,
}
impl AttachmentState {
pub fn is_detached(&self) -> bool {
matches!(self, Self::Detached)
}
pub fn is_attached(&self) -> bool {
matches!(
self,
Self::AttachedWeak
| Self::AttachedGood
| Self::AttachedStrong
| Self::FullyAttached
| Self::OverAttached
)
}
}
impl fmt::Display for AttachmentState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
@ -52,58 +67,93 @@ impl TryFrom<String> for AttachmentState {
}
}
/// Describe the attachment state of the Veilid node
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct VeilidStateAttachment {
/// The overall quality of the routing table if attached, or the current state the attachment state machine.
pub state: AttachmentState,
/// If attached and there are enough eachable nodes in the routing table to perform all the actions of the PublicInternet RoutingDomain,
/// including things like private/safety route allocation and DHT operations.
pub public_internet_ready: bool,
/// If attached and there are enough eachable nodes in the routing table to perform all the actions of the LocalNetwork RoutingDomain.
pub local_network_ready: bool,
}
/// Describe a recently accessed peer
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct PeerTableData {
/// The node ids used by this peer
#[schemars(with = "Vec<String>")]
#[cfg_attr(target_arch = "wasm32", tsify(type = "string[]"))]
pub node_ids: Vec<TypedKey>,
/// The peer's human readable address.
pub peer_address: String,
/// Statistics we have collected on this peer.
pub peer_stats: PeerStats,
}
/// Describe the current network state of the Veilid node
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct VeilidStateNetwork {
/// If the network has been started or not.
pub started: bool,
/// The total number of bytes per second used by Veilid currently in the download direction.
pub bps_down: ByteCount,
/// The total number of bytes per second used by Veilid currently in the upload direction.
pub bps_up: ByteCount,
/// The list of most recently accessed peers.
/// This is not an active connection table, nor is representative of the entire routing table.
pub peers: Vec<PeerTableData>,
}
/// Describe a private route change that has happened
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct VeilidRouteChange {
/// If a private route that was allocated has died, it is listed here.
#[schemars(with = "Vec<String>")]
pub dead_routes: Vec<RouteId>,
/// If a private route that was imported has died, it is listed here.
#[schemars(with = "Vec<String>")]
pub dead_remote_routes: Vec<RouteId>,
}
/// Describe changes to the Veilid node configuration
/// Currently this is only ever emitted once, however we reserve the right to
/// add the ability to change the configuration or have it changed by the Veilid node
/// itself during runtime.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct VeilidStateConfig {
/// If the Veilid node configuration has changed the full new config will be here.
pub config: VeilidConfigInner,
}
/// Describe when DHT records have subkey values changed
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify))]
pub struct VeilidValueChange {
/// The DHT Record key that changed
#[schemars(with = "String")]
pub key: TypedKey,
/// The portion of the DHT Record's subkeys that have changed
/// If the subkey range is empty, any watch present on the value has died.
pub subkeys: ValueSubkeyRangeSet,
/// The count remaining on the watch that triggered this value change
/// If there is no watch and this is received, it will be set to u32::MAX
/// If this value is zero, any watch present on the value has died.
pub count: u32,
/// The (optional) value data for the first subkey in the subkeys range
/// If 'subkeys' is not a single value, other values than the first value
/// must be retrieved with RoutingContext::get_dht_value().
pub value: Option<ValueData>,
}
/// An update from the veilid-core to the host application describing a change
/// to the internal state of the Veilid node.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify), tsify(into_wasm_abi))]
#[serde(tag = "kind")]
@ -120,6 +170,7 @@ pub enum VeilidUpdate {
}
from_impl_to_jsvalue!(VeilidUpdate);
/// A queriable state of the internals of veilid-core.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[cfg_attr(target_arch = "wasm32", derive(Tsify), tsify(into_wasm_abi))]
pub struct VeilidState {

View File

@ -21,10 +21,10 @@ packages:
dependency: transitive
description:
name: async_tools
sha256: "972f68ab663724d86260a31e363c1355ff493308441b872bf4e7b8adc67c832c"
sha256: e783ac6ed5645c86da34240389bb3a000fc5e3ae6589c6a482eb24ece7217681
url: "https://pub.dev"
source: hosted
version: "0.1.0"
version: "0.1.1"
boolean_selector:
dependency: transitive
description:
@ -85,10 +85,10 @@ packages:
dependency: "direct main"
description:
name: cupertino_icons
sha256: d57953e10f9f8327ce64a508a355f0b1ec902193f66288e8cb5070e7c47eeb2d
sha256: ba631d1c7f7bef6b729a622b7b752645a2d076dba9976925b8f25725a30e1ee6
url: "https://pub.dev"
source: hosted
version: "1.0.6"
version: "1.0.8"
equatable:
dependency: transitive
description:
@ -187,34 +187,34 @@ packages:
dependency: transitive
description:
name: json_annotation
sha256: b10a7b2ff83d83c777edba3c6a0f97045ddadd56c944e1a23a3fdf43a1bf4467
sha256: "1ce844379ca14835a50d2f019a3099f419082cfdd231cd86a142af94dd5c6bb1"
url: "https://pub.dev"
source: hosted
version: "4.8.1"
version: "4.9.0"
leak_tracker:
dependency: transitive
description:
name: leak_tracker
sha256: "78eb209deea09858f5269f5a5b02be4049535f568c07b275096836f01ea323fa"
sha256: "7f0df31977cb2c0b88585095d168e689669a2cc9b97c309665e3386f3e9d341a"
url: "https://pub.dev"
source: hosted
version: "10.0.0"
version: "10.0.4"
leak_tracker_flutter_testing:
dependency: transitive
description:
name: leak_tracker_flutter_testing
sha256: b46c5e37c19120a8a01918cfaf293547f47269f7cb4b0058f21531c2465d6ef0
sha256: "06e98f569d004c1315b991ded39924b21af84cf14cc94791b8aea337d25b57f8"
url: "https://pub.dev"
source: hosted
version: "2.0.1"
version: "3.0.3"
leak_tracker_testing:
dependency: transitive
description:
name: leak_tracker_testing
sha256: a597f72a664dbd293f3bfc51f9ba69816f84dcd403cdac7066cb3f6003f3ab47
sha256: "6ba465d5d76e67ddf503e1161d1f4a6bc42306f9d66ca1e8f079a47290fb06d3"
url: "https://pub.dev"
source: hosted
version: "2.0.1"
version: "3.0.1"
lint_hard:
dependency: "direct dev"
description:
@ -259,10 +259,10 @@ packages:
dependency: transitive
description:
name: meta
sha256: d584fa6707a52763a52446f02cc621b077888fb63b93bbcb1143a7be5a0c0c04
sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136"
url: "https://pub.dev"
source: hosted
version: "1.11.0"
version: "1.12.0"
path:
dependency: "direct main"
description:
@ -275,26 +275,26 @@ packages:
dependency: "direct main"
description:
name: path_provider
sha256: b27217933eeeba8ff24845c34003b003b2b22151de3c908d0e679e8fe1aa078b
sha256: c9e7d3a4cd1410877472158bee69963a4579f78b68c65a2b7d40d1a7a88bb161
url: "https://pub.dev"
source: hosted
version: "2.1.2"
version: "2.1.3"
path_provider_android:
dependency: transitive
description:
name: path_provider_android
sha256: "477184d672607c0a3bf68fbbf601805f92ef79c82b64b4d6eb318cbca4c48668"
sha256: a248d8146ee5983446bf03ed5ea8f6533129a12b11f12057ad1b4a67a2b3b41d
url: "https://pub.dev"
source: hosted
version: "2.2.2"
version: "2.2.4"
path_provider_foundation:
dependency: transitive
description:
name: path_provider_foundation
sha256: "5a7999be66e000916500be4f15a3633ebceb8302719b47b9cc49ce924125350f"
sha256: f234384a3fdd67f989b4d54a5d73ca2a6c422fa55ae694381ae0f4375cd1ea16
url: "https://pub.dev"
source: hosted
version: "2.3.2"
version: "2.4.0"
path_provider_linux:
dependency: transitive
description:
@ -424,10 +424,10 @@ packages:
dependency: transitive
description:
name: test_api
sha256: "5c2f730018264d276c20e4f1503fd1308dfbbae39ec8ee63c5236311ac06954b"
sha256: "9955ae474176f7ac8ee4e989dadfb411a58c30415bcfb648fa04b2b8a03afa7f"
url: "https://pub.dev"
source: hosted
version: "0.6.1"
version: "0.7.0"
typed_data:
dependency: transitive
description:
@ -462,10 +462,10 @@ packages:
dependency: transitive
description:
name: vm_service
sha256: b3d56ff4341b8f182b96aceb2fa20e3dcb336b9f867bc0eafc0de10f1048e957
sha256: "3923c89304b715fb1eb6423f017651664a03bf5f4b29983627c4da791f74a4ec"
url: "https://pub.dev"
source: hosted
version: "13.0.0"
version: "14.2.1"
webdriver:
dependency: transitive
description:
@ -478,10 +478,10 @@ packages:
dependency: transitive
description:
name: win32
sha256: "8cb58b45c47dcb42ab3651533626161d6b67a2921917d8d429791f76972b3480"
sha256: a79dbe579cb51ecd6d30b17e0cae4e0ea15e2c0e66f69ad4198f22a6789e94f4
url: "https://pub.dev"
source: hosted
version: "5.3.0"
version: "5.5.1"
xdg_directories:
dependency: transitive
description:
@ -507,5 +507,5 @@ packages:
source: hosted
version: "0.0.6"
sdks:
dart: ">=3.3.4 <4.0.0"
dart: ">=3.4.0 <4.0.0"
flutter: ">=3.19.1"

View File

@ -31,9 +31,9 @@ dependencies:
# The following adds the Cupertino Icons font to your application.
# Use with the CupertinoIcons class for iOS style icons.
cupertino_icons: ^1.0.6
cupertino_icons: ^1.0.8
loggy: ^2.0.3
path_provider: ^2.1.2
path_provider: ^2.1.3
path: ^1.9.0
xterm: ^4.0.0
flutter_acrylic: ^1.1.3

View File

@ -21,18 +21,18 @@ dependencies:
flutter_web_plugins:
sdk: flutter
freezed_annotation: ^2.4.1
json_annotation: ^4.8.1
json_annotation: ^4.9.0
path: ^1.9.0
path_provider: ^2.1.2
path_provider: ^2.1.3
system_info2: ^4.0.0
system_info_plus: ^0.0.5
dev_dependencies:
build_runner: ^2.4.8
build_runner: ^2.4.10
flutter_test:
sdk: flutter
freezed: ^2.4.7
json_serializable: ^6.7.1
freezed: ^2.5.2
json_serializable: ^6.8.0
lint_hard: ^4.0.0
# The following section is specific to Flutter.

View File

@ -0,0 +1,125 @@
use futures_util::{
future::{select, Either},
stream::FuturesUnordered,
StreamExt,
};
use stop_token::future::FutureExt as _;
use super::*;
/// Background processor for streams
/// Handles streams to completion, passing each item from the stream to a callback
pub struct DeferredStreamProcessor {
pub opt_deferred_stream_channel: Option<flume::Sender<SendPinBoxFuture<()>>>,
pub opt_stopper: Option<StopSource>,
pub opt_join_handle: Option<MustJoinHandle<()>>,
}
impl DeferredStreamProcessor {
/// Create a new DeferredStreamProcessor
pub fn new() -> Self {
Self {
opt_deferred_stream_channel: None,
opt_stopper: None,
opt_join_handle: None,
}
}
/// Initialize the processor before use
pub async fn init(&mut self) {
let stopper = StopSource::new();
let stop_token = stopper.token();
self.opt_stopper = Some(stopper);
let (dsc_tx, dsc_rx) = flume::unbounded::<SendPinBoxFuture<()>>();
self.opt_deferred_stream_channel = Some(dsc_tx);
self.opt_join_handle = Some(spawn(Self::processor(stop_token, dsc_rx)));
}
/// Terminate the processor and ensure all streams are closed
pub async fn terminate(&mut self) {
drop(self.opt_deferred_stream_channel.take());
drop(self.opt_stopper.take());
if let Some(jh) = self.opt_join_handle.take() {
jh.await;
}
}
async fn processor(stop_token: StopToken, dsc_rx: flume::Receiver<SendPinBoxFuture<()>>) {
let mut unord = FuturesUnordered::<SendPinBoxFuture<()>>::new();
// Ensure the unord never finishes
unord.push(Box::pin(std::future::pending()));
// Processor loop
let mut unord_fut = unord.next();
let mut dsc_fut = dsc_rx.recv_async();
while let Ok(res) = select(unord_fut, dsc_fut)
.timeout_at(stop_token.clone())
.await
{
match res {
Either::Left((x, old_dsc_fut)) => {
// Unord future processor should never get empty
assert!(x.is_some());
// Make another unord future to process
unord_fut = unord.next();
// put back the other future and keep going
dsc_fut = old_dsc_fut;
}
Either::Right((new_proc, old_unord_fut)) => {
// Immediately drop the old unord future
// because we never care about it completing
drop(old_unord_fut);
let Ok(new_proc) = new_proc else {
break;
};
// Add a new stream to process
unord.push(new_proc);
// Make a new unord future because we don't care about the
// completion of the last unord future, they never return
// anything.
unord_fut = unord.next();
// Make a new receiver future
dsc_fut = dsc_rx.recv_async();
}
}
}
}
/// Queue a stream to process in the background
/// * 'receiver' is the stream to process
/// * 'handler' is the callback to handle each item from the stream
/// Returns 'true' if the stream was added for processing, and 'false' if the stream could not be added, possibly due to not being initialized
pub fn add<T: Send + 'static>(
&mut self,
receiver: flume::Receiver<T>,
mut handler: impl FnMut(T) -> SendPinBoxFuture<bool> + Send + 'static,
) -> bool {
let Some(st) = self.opt_stopper.as_ref().map(|s| s.token()) else {
return false;
};
let Some(dsc_tx) = self.opt_deferred_stream_channel.clone() else {
return false;
};
let drp = Box::pin(async move {
while let Ok(Ok(res)) = receiver.recv_async().timeout_at(st.clone()).await {
if !handler(res).await {
break;
}
}
});
if dsc_tx.send(drp).is_err() {
return false;
}
true
}
}
impl Default for DeferredStreamProcessor {
fn default() -> Self {
Self::new()
}
}

View File

@ -29,6 +29,7 @@ pub mod assembly_buffer;
pub mod async_peek_stream;
pub mod async_tag_lock;
pub mod clone_stream;
pub mod deferred_stream_processor;
pub mod eventual;
pub mod eventual_base;
pub mod eventual_value;
@ -162,6 +163,8 @@ pub use async_tag_lock::*;
#[doc(inline)]
pub use clone_stream::*;
#[doc(inline)]
pub use deferred_stream_processor::*;
#[doc(inline)]
pub use eventual::*;
#[doc(inline)]
pub use eventual_base::{EventualCommon, EventualResolvedFuture};