shutdown work and perfetto logger

This commit is contained in:
Christien Rioux 2024-07-20 18:05:24 -04:00
parent fdead16fc8
commit e568c39efb
16 changed files with 439 additions and 88 deletions

105
Cargo.lock generated
View File

@ -1958,6 +1958,12 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.30" version = "1.0.30"
@ -3147,6 +3153,12 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "multimap"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
[[package]] [[package]]
name = "nanorand" name = "nanorand"
version = "0.7.0" version = "0.7.0"
@ -4019,6 +4031,16 @@ dependencies = [
"sha2 0.10.8", "sha2 0.10.8",
] ]
[[package]]
name = "petgraph"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"indexmap 2.2.6",
]
[[package]] [[package]]
name = "pharos" name = "pharos"
version = "0.5.3" version = "0.5.3"
@ -4170,6 +4192,16 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "prettyplease"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e"
dependencies = [
"proc-macro2",
"syn 2.0.71",
]
[[package]] [[package]]
name = "proc-macro-crate" name = "proc-macro-crate"
version = "0.1.5" version = "0.1.5"
@ -4218,6 +4250,27 @@ dependencies = [
"prost-derive 0.12.6", "prost-derive 0.12.6",
] ]
[[package]]
name = "prost-build"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
dependencies = [
"bytes",
"heck 0.5.0",
"itertools 0.12.1",
"log",
"multimap",
"once_cell",
"petgraph",
"prettyplease",
"prost 0.12.6",
"prost-types",
"regex",
"syn 2.0.71",
"tempfile",
]
[[package]] [[package]]
name = "prost-derive" name = "prost-derive"
version = "0.11.9" version = "0.11.9"
@ -4259,6 +4312,15 @@ version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "protobuf-src"
version = "2.0.1+26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ba1cfa4b9dc098926b8cce388bf434b93516db3ecf6e8b1a37eb643d733ee7"
dependencies = [
"cmake",
]
[[package]] [[package]]
name = "quick-error" name = "quick-error"
version = "1.2.3" version = "1.2.3"
@ -5252,6 +5314,18 @@ dependencies = [
"windows 0.52.0", "windows 0.52.0",
] ]
[[package]]
name = "tempfile"
version = "3.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
dependencies = [
"cfg-if 1.0.0",
"fastrand 2.1.0",
"rustix 0.38.34",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "termcolor" name = "termcolor"
version = "1.4.1" version = "1.4.1"
@ -5310,6 +5384,16 @@ dependencies = [
"syn 2.0.71", "syn 2.0.71",
] ]
[[package]]
name = "thread-id"
version = "4.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "thread_local" name = "thread_local"
version = "1.1.8" version = "1.1.8"
@ -5744,6 +5828,24 @@ dependencies = [
"tracing-subscriber", "tracing-subscriber",
] ]
[[package]]
name = "tracing-perfetto"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd21777b526dfcb57f11f65aa8a2024d83e1db52841993229b6e282e511978b7"
dependencies = [
"anyhow",
"bytes",
"chrono",
"prost 0.12.6",
"prost-build",
"protobuf-src",
"rand",
"thread-id",
"tracing",
"tracing-subscriber",
]
[[package]] [[package]]
name = "tracing-subscriber" name = "tracing-subscriber"
version = "0.3.18" version = "0.3.18"
@ -6233,6 +6335,7 @@ dependencies = [
"tracing-flame", "tracing-flame",
"tracing-journald", "tracing-journald",
"tracing-opentelemetry 0.24.0", "tracing-opentelemetry 0.24.0",
"tracing-perfetto",
"tracing-subscriber", "tracing-subscriber",
"url", "url",
"veilid-bugsalot", "veilid-bugsalot",
@ -6246,7 +6349,7 @@ name = "veilid-tools"
version = "0.3.3" version = "0.3.3"
dependencies = [ dependencies = [
"android_logger 0.13.3", "android_logger 0.13.3",
"async-lock 2.8.0", "async-lock 3.4.0",
"async-std", "async-std",
"async_executors", "async_executors",
"backtrace", "backtrace",

View File

@ -414,6 +414,10 @@ impl ConnectionManager {
if !allow_accept { if !allow_accept {
return; return;
} }
let Ok(_guard) = self.arc.startup_lock.enter() else {
return;
};
// Async lock on the remote address for atomicity per remote // Async lock on the remote address for atomicity per remote
let _lock_guard = self let _lock_guard = self
.arc .arc

View File

@ -64,6 +64,8 @@ pub const PUBLIC_ADDRESS_INCONSISTENCY_PUNISHMENT_TIMEOUT_US: TimestampDuration
pub const ADDRESS_FILTER_TASK_INTERVAL_SECS: u32 = 60; pub const ADDRESS_FILTER_TASK_INTERVAL_SECS: u32 = 60;
pub const BOOT_MAGIC: &[u8; 4] = b"BOOT"; pub const BOOT_MAGIC: &[u8; 4] = b"BOOT";
static FUCK: AtomicUsize = AtomicUsize::new(0);
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct ProtocolConfig { pub struct ProtocolConfig {
pub outbound: ProtocolTypeSet, pub outbound: ProtocolTypeSet,
@ -172,6 +174,8 @@ struct NetworkManagerUnlockedInner {
address_filter_task: TickTask<EyreReport>, address_filter_task: TickTask<EyreReport>,
// Network Key // Network Key
network_key: Option<SharedSecret>, network_key: Option<SharedSecret>,
// Startup Lock
startup_lock: StartupLock,
} }
#[derive(Clone)] #[derive(Clone)]
@ -213,6 +217,7 @@ impl NetworkManager {
public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS), public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS),
address_filter_task: TickTask::new(ADDRESS_FILTER_TASK_INTERVAL_SECS), address_filter_task: TickTask::new(ADDRESS_FILTER_TASK_INTERVAL_SECS),
network_key, network_key,
startup_lock: StartupLock::new(),
} }
} }
@ -445,27 +450,30 @@ impl NetworkManager {
#[instrument(level = "debug", skip_all, err)] #[instrument(level = "debug", skip_all, err)]
pub async fn startup(&self) -> EyreResult<StartupDisposition> { pub async fn startup(&self) -> EyreResult<StartupDisposition> {
let guard = self.unlocked_inner.startup_lock.startup()?;
match self.internal_startup().await { match self.internal_startup().await {
Ok(StartupDisposition::Success) => { Ok(StartupDisposition::Success) => {
guard.success();
// Inform api clients that things have changed // Inform api clients that things have changed
self.send_network_update(); self.send_network_update();
Ok(StartupDisposition::Success) Ok(StartupDisposition::Success)
} }
Ok(StartupDisposition::BindRetry) => { Ok(StartupDisposition::BindRetry) => {
self.shutdown().await; self.shutdown_internal().await;
Ok(StartupDisposition::BindRetry) Ok(StartupDisposition::BindRetry)
} }
Err(e) => { Err(e) => {
self.shutdown().await; self.shutdown_internal().await;
Err(e) Err(e)
} }
} }
} }
#[instrument(level = "debug", skip_all)] #[instrument(level = "debug", skip_all)]
pub async fn shutdown(&self) { async fn shutdown_internal(&self) {
log_net!(debug "starting network manager shutdown");
// Cancel all tasks // Cancel all tasks
self.cancel_tasks().await; self.cancel_tasks().await;
@ -487,6 +495,20 @@ impl NetworkManager {
{ {
*self.inner.lock() = NetworkManager::new_inner(); *self.inner.lock() = NetworkManager::new_inner();
} }
}
#[instrument(level = "debug", skip_all)]
pub async fn shutdown(&self) {
log_net!(debug "starting network manager shutdown");
let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else {
log_net!(debug "network manager is already shut down");
return;
};
self.shutdown_internal().await;
guard.success();
// send update // send update
log_net!(debug "sending network state update to api clients"); log_net!(debug "sending network state update to api clients");
@ -562,6 +584,9 @@ impl NetworkManager {
extra_data: D, extra_data: D,
callback: impl ReceiptCallback, callback: impl ReceiptCallback,
) -> EyreResult<Vec<u8>> { ) -> EyreResult<Vec<u8>> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
bail!("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let routing_table = self.routing_table(); let routing_table = self.routing_table();
@ -597,6 +622,10 @@ impl NetworkManager {
expiration_us: u64, expiration_us: u64,
extra_data: D, extra_data: D,
) -> EyreResult<(Vec<u8>, EventualValueFuture<ReceiptEvent>)> { ) -> EyreResult<(Vec<u8>, EventualValueFuture<ReceiptEvent>)> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
bail!("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let routing_table = self.routing_table(); let routing_table = self.routing_table();
@ -633,6 +662,10 @@ impl NetworkManager {
&self, &self,
receipt_data: R, receipt_data: R,
) -> NetworkResult<()> { ) -> NetworkResult<()> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return NetworkResult::service_unavailable("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) {
@ -654,6 +687,10 @@ impl NetworkManager {
receipt_data: R, receipt_data: R,
inbound_noderef: NodeRef, inbound_noderef: NodeRef,
) -> NetworkResult<()> { ) -> NetworkResult<()> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return NetworkResult::service_unavailable("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) {
@ -674,6 +711,10 @@ impl NetworkManager {
&self, &self,
receipt_data: R, receipt_data: R,
) -> NetworkResult<()> { ) -> NetworkResult<()> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return NetworkResult::service_unavailable("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) {
@ -695,6 +736,10 @@ impl NetworkManager {
receipt_data: R, receipt_data: R,
private_route: PublicKey, private_route: PublicKey,
) -> NetworkResult<()> { ) -> NetworkResult<()> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return NetworkResult::service_unavailable("network is not started");
};
let receipt_manager = self.receipt_manager(); let receipt_manager = self.receipt_manager();
let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) {
@ -710,12 +755,16 @@ impl NetworkManager {
} }
// Process a received signal // Process a received signal
#[instrument(level = "trace", target = "receipt", skip_all)] #[instrument(level = "trace", target = "net", skip_all)]
pub async fn handle_signal( pub async fn handle_signal(
&self, &self,
signal_flow: Flow, signal_flow: Flow,
signal_info: SignalInfo, signal_info: SignalInfo,
) -> EyreResult<NetworkResult<()>> { ) -> EyreResult<NetworkResult<()>> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return Ok(NetworkResult::service_unavailable("network is not started"));
};
match signal_info { match signal_info {
SignalInfo::ReverseConnect { receipt, peer_info } => { SignalInfo::ReverseConnect { receipt, peer_info } => {
let routing_table = self.routing_table(); let routing_table = self.routing_table();
@ -809,7 +858,7 @@ impl NetworkManager {
} }
/// Builds an envelope for sending over the network /// Builds an envelope for sending over the network
#[instrument(level = "trace", target = "receipt", skip_all)] #[instrument(level = "trace", target = "net", skip_all)]
fn build_envelope<B: AsRef<[u8]>>( fn build_envelope<B: AsRef<[u8]>>(
&self, &self,
dest_node_id: TypedKey, dest_node_id: TypedKey,
@ -852,13 +901,19 @@ impl NetworkManager {
/// node_ref is the direct destination to which the envelope will be sent /// node_ref is the direct destination to which the envelope will be sent
/// If 'destination_node_ref' is specified, it can be different than the node_ref being sent to /// If 'destination_node_ref' is specified, it can be different than the node_ref being sent to
/// which will cause the envelope to be relayed /// which will cause the envelope to be relayed
#[instrument(level = "trace", target = "receipt", skip_all)] #[instrument(level = "trace", target = "net", skip_all)]
pub async fn send_envelope<B: AsRef<[u8]>>( pub async fn send_envelope<B: AsRef<[u8]>>(
&self, &self,
node_ref: NodeRef, node_ref: NodeRef,
destination_node_ref: Option<NodeRef>, destination_node_ref: Option<NodeRef>,
body: B, body: B,
) -> EyreResult<NetworkResult<SendDataMethod>> { ) -> EyreResult<NetworkResult<SendDataMethod>> {
let _dg = DebugGuard::new(&FUCK);
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
bail!("network is not started");
};
let destination_node_ref = destination_node_ref.as_ref().unwrap_or(&node_ref).clone(); let destination_node_ref = destination_node_ref.as_ref().unwrap_or(&node_ref).clone();
let best_node_id = destination_node_ref.best_node_id(); let best_node_id = destination_node_ref.best_node_id();
@ -896,6 +951,10 @@ impl NetworkManager {
dial_info: DialInfo, dial_info: DialInfo,
rcpt_data: Vec<u8>, rcpt_data: Vec<u8>,
) -> EyreResult<()> { ) -> EyreResult<()> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
bail!("network is not started");
};
// Do we need to validate the outgoing receipt? Probably not // Do we need to validate the outgoing receipt? Probably not
// because it is supposed to be opaque and the // because it is supposed to be opaque and the
// recipient/originator does the validation // recipient/originator does the validation
@ -916,8 +975,12 @@ impl NetworkManager {
// Called when a packet potentially containing an RPC envelope is received by a low-level // Called when a packet potentially containing an RPC envelope is received by a low-level
// network protocol handler. Processes the envelope, authenticates and decrypts the RPC message // network protocol handler. Processes the envelope, authenticates and decrypts the RPC message
// and passes it to the RPC handler // and passes it to the RPC handler
#[instrument(level = "trace", target = "receipt", skip_all)] #[instrument(level = "trace", target = "net", skip_all)]
async fn on_recv_envelope(&self, data: &mut [u8], flow: Flow) -> EyreResult<bool> { async fn on_recv_envelope(&self, data: &mut [u8], flow: Flow) -> EyreResult<bool> {
let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else {
return Ok(false);
};
log_net!("envelope of {} bytes received from {:?}", data.len(), flow); log_net!("envelope of {} bytes received from {:?}", data.len(), flow);
let remote_addr = flow.remote_address().ip_addr(); let remote_addr = flow.remote_address().ip_addr();

View File

@ -941,10 +941,12 @@ impl Network {
} }
Ok(StartupDisposition::BindRetry) => { Ok(StartupDisposition::BindRetry) => {
debug!("network bind retry"); debug!("network bind retry");
self.shutdown_internal().await;
Ok(StartupDisposition::BindRetry) Ok(StartupDisposition::BindRetry)
} }
Err(e) => { Err(e) => {
debug!("network failed to start"); debug!("network failed to start");
self.shutdown_internal().await;
Err(e) Err(e)
} }
} }
@ -964,13 +966,7 @@ impl Network {
} }
#[instrument(level = "debug", skip_all)] #[instrument(level = "debug", skip_all)]
pub async fn shutdown(&self) { async fn shutdown_internal(&self) {
log_net!(debug "starting low level network shutdown");
let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else {
log_net!(debug "low level network is already shut down");
return;
};
let routing_table = self.routing_table(); let routing_table = self.routing_table();
// Stop all tasks // Stop all tasks
@ -1014,6 +1010,17 @@ impl Network {
// Reset state including network class // Reset state including network class
*self.inner.lock() = Self::new_inner(); *self.inner.lock() = Self::new_inner();
}
#[instrument(level = "debug", skip_all)]
pub async fn shutdown(&self) {
log_net!(debug "starting low level network shutdown");
let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else {
log_net!(debug "low level network is already shut down");
return;
};
self.shutdown_internal().await;
guard.success(); guard.success();
log_net!(debug "finished low level network shutdown"); log_net!(debug "finished low level network shutdown");

View File

@ -309,10 +309,6 @@ impl NetworkConnection {
match res { match res {
Ok((_span_id, message)) => { Ok((_span_id, message)) => {
// let span = span!(Level::TRACE, "process_connection send");
// span.follows_from(span_id);
// let _enter = span.enter();
// Touch the LRU for this connection // Touch the LRU for this connection
connection_manager.touch_connection_by_id(connection_id); connection_manager.touch_connection_by_id(connection_id);

View File

@ -35,7 +35,7 @@ impl Default for NetworkManagerStats {
impl NetworkManager { impl NetworkManager {
// Callbacks from low level network for statistics gathering // Callbacks from low level network for statistics gathering
pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) { pub(crate) fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) {
let inner = &mut *self.inner.lock(); let inner = &mut *self.inner.lock();
inner inner
.stats .stats
@ -52,7 +52,7 @@ impl NetworkManager {
.add_up(bytes); .add_up(bytes);
} }
pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) { pub(crate) fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) {
let inner = &mut *self.inner.lock(); let inner = &mut *self.inner.lock();
inner inner
.stats .stats

View File

@ -114,7 +114,6 @@ impl RoutingTable {
rpc.rpc_call_status(Destination::direct(relay_nr_filtered)) rpc.rpc_call_status(Destination::direct(relay_nr_filtered))
.await .await
} }
.instrument(Span::current())
.boxed(), .boxed(),
); );
} }
@ -159,9 +158,7 @@ impl RoutingTable {
log_rtab!("--> Watch ping to {:?}", watch_nr); log_rtab!("--> Watch ping to {:?}", watch_nr);
futurequeue.push_back( futurequeue.push_back(
async move { rpc.rpc_call_status(Destination::direct(watch_nr)).await } async move { rpc.rpc_call_status(Destination::direct(watch_nr)).await }.boxed(),
.instrument(Span::current())
.boxed(),
); );
} }
Ok(()) Ok(())
@ -198,9 +195,7 @@ impl RoutingTable {
let rpc = rpc.clone(); let rpc = rpc.clone();
log_rtab!("--> Validator ping to {:?}", nr); log_rtab!("--> Validator ping to {:?}", nr);
futurequeue.push_back( futurequeue.push_back(
async move { rpc.rpc_call_status(Destination::direct(nr)).await } async move { rpc.rpc_call_status(Destination::direct(nr)).await }.boxed(),
.instrument(Span::current())
.boxed(),
); );
} }
@ -226,9 +221,7 @@ impl RoutingTable {
// Just do a single ping with the best protocol for all the nodes // Just do a single ping with the best protocol for all the nodes
futurequeue.push_back( futurequeue.push_back(
async move { rpc.rpc_call_status(Destination::direct(nr)).await } async move { rpc.rpc_call_status(Destination::direct(nr)).await }.boxed(),
.instrument(Span::current())
.boxed(),
); );
} }
@ -244,6 +237,8 @@ impl RoutingTable {
_last_ts: Timestamp, _last_ts: Timestamp,
cur_ts: Timestamp, cur_ts: Timestamp,
) -> EyreResult<()> { ) -> EyreResult<()> {
eprintln!("pv tick");
let mut futurequeue: VecDeque<PingValidatorFuture> = VecDeque::new(); let mut futurequeue: VecDeque<PingValidatorFuture> = VecDeque::new();
// PublicInternet // PublicInternet
@ -258,7 +253,6 @@ impl RoutingTable {
let mut unord = FuturesUnordered::new(); let mut unord = FuturesUnordered::new();
while !unord.is_empty() || !futurequeue.is_empty() { while !unord.is_empty() || !futurequeue.is_empty() {
#[cfg(feature = "verbose-tracing")]
log_rtab!(debug "Ping validation queue: {} remaining, {} in progress", futurequeue.len(), unord.len()); log_rtab!(debug "Ping validation queue: {} remaining, {} in progress", futurequeue.len(), unord.len());
// Process one unordered futures if we have some // Process one unordered futures if we have some
match unord.next().timeout_at(stop_token.clone()).await { match unord.next().timeout_at(stop_token.clone()).await {
@ -279,7 +273,7 @@ impl RoutingTable {
let Some(fq) = futurequeue.pop_front() else { let Some(fq) = futurequeue.pop_front() else {
break; break;
}; };
unord.push(fq); unord.push(fq.in_current_span());
} }
} }

View File

@ -81,6 +81,7 @@ stop-token = { version = "^0", default-features = false }
sysinfo = { version = "^0.30.13" } sysinfo = { version = "^0.30.13" }
wg = { version = "^0.9.1", features = ["future"] } wg = { version = "^0.9.1", features = ["future"] }
tracing-flame = "0.2.0" tracing-flame = "0.2.0"
tracing-perfetto = "0.1.1"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
windows-service = "^0" windows-service = "^0"

View File

@ -84,10 +84,14 @@ pub struct CmdlineArgs {
#[arg(long, value_name = "endpoint")] #[arg(long, value_name = "endpoint")]
otlp: Option<String>, otlp: Option<String>,
/// Turn on flamegraph tracing (experimental, isn't terribly useful) /// Turn on flamegraph tracing (experimental)
#[arg(long, hide = true, value_name = "PATH", num_args=0..=1, require_equals=true, default_missing_value = "")] #[arg(long, hide = true, value_name = "PATH", num_args=0..=1, require_equals=true, default_missing_value = "")]
flame: Option<OsString>, flame: Option<OsString>,
/// Turn on perfetto tracing (experimental)
#[arg(long, hide = true, value_name = "PATH", num_args=0..=1, require_equals=true, default_missing_value = "")]
perfetto: Option<OsString>,
/// Run as an extra daemon on the same machine for testing purposes, specify a number greater than zero to offset the listening ports /// Run as an extra daemon on the same machine for testing purposes, specify a number greater than zero to offset the listening ports
#[arg(long)] #[arg(long)]
subnode_index: Option<u16>, subnode_index: Option<u16>,
@ -223,6 +227,18 @@ fn main() -> EyreResult<()> {
settingsrw.logging.flame.enabled = true; settingsrw.logging.flame.enabled = true;
settingsrw.logging.flame.path = flame; settingsrw.logging.flame.path = flame;
} }
if let Some(perfetto) = args.perfetto {
let perfetto = if perfetto.is_empty() {
Settings::get_default_perfetto_path(settingsrw.testing.subnode_index)
.to_string_lossy()
.to_string()
} else {
perfetto.to_string_lossy().to_string()
};
println!("Enabling perfetto output to {}", perfetto);
settingsrw.logging.perfetto.enabled = true;
settingsrw.logging.perfetto.path = perfetto;
}
if args.no_attach { if args.no_attach {
settingsrw.auto_attach = false; settingsrw.auto_attach = false;

View File

@ -69,6 +69,9 @@ logging:
flame: flame:
enabled: false enabled: false
path: '' path: ''
perfetto:
enabled: false
path: ''
console: console:
enabled: false enabled: false
testing: testing:
@ -451,6 +454,12 @@ pub struct Flame {
pub path: String, pub path: String,
} }
#[derive(Debug, Deserialize, Serialize)]
pub struct Perfetto {
pub enabled: bool,
pub path: String,
}
#[derive(Debug, Deserialize, Serialize)] #[derive(Debug, Deserialize, Serialize)]
pub struct Console { pub struct Console {
pub enabled: bool, pub enabled: bool,
@ -503,6 +512,7 @@ pub struct Logging {
pub api: Api, pub api: Api,
pub otlp: Otlp, pub otlp: Otlp,
pub flame: Flame, pub flame: Flame,
pub perfetto: Perfetto,
pub console: Console, pub console: Console,
} }
@ -873,6 +883,15 @@ impl Settings {
}) })
} }
/// Determine default perfetto output path
pub fn get_default_perfetto_path(subnode_index: u16) -> PathBuf {
std::env::temp_dir().join(if subnode_index == 0 {
"veilid-server.pftrace".to_owned()
} else {
format!("veilid-server-{}.pftrace", subnode_index)
})
}
#[allow(dead_code)] #[allow(dead_code)]
fn get_or_create_private_directory<P: AsRef<Path>>(path: P, group_read: bool) -> bool { fn get_or_create_private_directory<P: AsRef<Path>>(path: P, group_read: bool) -> bool {
let path = path.as_ref(); let path = path.as_ref();
@ -996,6 +1015,8 @@ impl Settings {
set_config_value!(inner.logging.otlp.ignore_log_targets, value); set_config_value!(inner.logging.otlp.ignore_log_targets, value);
set_config_value!(inner.logging.flame.enabled, value); set_config_value!(inner.logging.flame.enabled, value);
set_config_value!(inner.logging.flame.path, value); set_config_value!(inner.logging.flame.path, value);
set_config_value!(inner.logging.perfetto.enabled, value);
set_config_value!(inner.logging.perfetto.path, value);
set_config_value!(inner.logging.console.enabled, value); set_config_value!(inner.logging.console.enabled, value);
set_config_value!(inner.testing.subnode_index, value); set_config_value!(inner.testing.subnode_index, value);
set_config_value!(inner.core.capabilities.disable, value); set_config_value!(inner.core.capabilities.disable, value);
@ -1565,6 +1586,8 @@ mod tests {
); );
assert!(!s.logging.flame.enabled); assert!(!s.logging.flame.enabled);
assert_eq!(s.logging.flame.path, ""); assert_eq!(s.logging.flame.path, "");
assert!(!s.logging.perfetto.enabled);
assert_eq!(s.logging.perfetto.path, "");
assert!(!s.logging.console.enabled); assert!(!s.logging.console.enabled);
assert_eq!(s.testing.subnode_index, 0); assert_eq!(s.testing.subnode_index, 0);

View File

@ -18,6 +18,7 @@ use std::path::*;
use std::sync::Arc; use std::sync::Arc;
use tracing_appender::*; use tracing_appender::*;
use tracing_flame::FlameLayer; use tracing_flame::FlameLayer;
use tracing_perfetto::PerfettoLayer;
use tracing_subscriber::prelude::*; use tracing_subscriber::prelude::*;
use tracing_subscriber::*; use tracing_subscriber::*;
@ -48,14 +49,15 @@ impl VeilidLogs {
#[cfg(feature = "rt-tokio")] #[cfg(feature = "rt-tokio")]
if settingsr.logging.console.enabled { if settingsr.logging.console.enabled {
let filter = veilid_core::VeilidLayerFilter::new_no_default(
veilid_core::VeilidConfigLogLevel::Trace,
&[],
);
let layer = ConsoleLayer::builder() let layer = ConsoleLayer::builder()
.with_default_env() .with_default_env()
.spawn() .spawn()
.with_filter( .with_filter(filter);
filter::Targets::new()
.with_target("tokio", Level::TRACE)
.with_target("runtime", Level::TRACE),
);
layers.push(layer.boxed()); layers.push(layer.boxed());
} }
@ -93,6 +95,26 @@ impl VeilidLogs {
); );
} }
// Perfetto logger
if settingsr.logging.perfetto.enabled {
let filter = veilid_core::VeilidLayerFilter::new_no_default(
veilid_core::VeilidConfigLogLevel::Trace,
&veilid_core::FLAME_LOG_FACILITIES_IGNORE_LIST.map(|x| x.to_string()),
);
let perfetto_layer = PerfettoLayer::new(std::sync::Mutex::new(std::fs::File::create(
&settingsr.logging.perfetto.path,
)?));
// Do not include this in change_log_level changes, so we keep trace level
// filters.insert("flame", filter.clone());
layers.push(
perfetto_layer
.with_debug_annotations(true)
.with_filter(filter)
.boxed(),
);
}
// OpenTelemetry logger // OpenTelemetry logger
#[cfg(feature = "opentelemetry-otlp")] #[cfg(feature = "opentelemetry-otlp")]
if settingsr.logging.otlp.enabled { if settingsr.logging.otlp.enabled {

View File

@ -34,6 +34,7 @@ rt-wasm-bindgen = ["async_executors/bindgen", "async_executors/timer"]
veilid_tools_android_tests = ["dep:paranoid-android"] veilid_tools_android_tests = ["dep:paranoid-android"]
veilid_tools_ios_tests = ["dep:tracing", "dep:oslog", "dep:tracing-oslog"] veilid_tools_ios_tests = ["dep:tracing", "dep:oslog", "dep:tracing-oslog"]
tracing = ["dep:tracing", "dep:tracing-subscriber"] tracing = ["dep:tracing", "dep:tracing-subscriber"]
debug-locks = []
[dependencies] [dependencies]
tracing = { version = "0.1.40", features = [ tracing = { version = "0.1.40", features = [
@ -52,6 +53,7 @@ futures-util = { version = "0.3.30", default-features = false, features = [
"alloc", "alloc",
] } ] }
parking_lot = "0.12.3" parking_lot = "0.12.3"
async-lock = "3.4.0"
once_cell = "1.19.0" once_cell = "1.19.0"
stop-token = { version = "0.7.0", default-features = false } stop-token = { version = "0.7.0", default-features = false }
rand = "0.8.5" rand = "0.8.5"
@ -87,7 +89,6 @@ wasm-bindgen-futures = "0.4.42"
async_executors = { version = "0.7.0", default-features = false } async_executors = { version = "0.7.0", default-features = false }
getrandom = { version = "0.2", features = ["js"] } getrandom = { version = "0.2", features = ["js"] }
async-lock = "2.8.0"
send_wrapper = { version = "0.6.0", features = ["futures"] } send_wrapper = { version = "0.6.0", features = ["futures"] }
# Dependencies for Linux or Android # Dependencies for Linux or Android

View File

@ -107,7 +107,7 @@ pub use std::str::FromStr;
#[doc(no_inline)] #[doc(no_inline)]
pub use std::string::{String, ToString}; pub use std::string::{String, ToString};
#[doc(no_inline)] #[doc(no_inline)]
pub use std::sync::atomic::{AtomicBool, Ordering}; pub use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
#[doc(no_inline)] #[doc(no_inline)]
pub use std::sync::{Arc, Weak}; pub use std::sync::{Arc, Weak};
#[doc(no_inline)] #[doc(no_inline)]
@ -117,6 +117,13 @@ pub use std::time::Duration;
#[doc(no_inline)] #[doc(no_inline)]
pub use std::vec::Vec; pub use std::vec::Vec;
#[doc(no_inline)]
pub use async_lock::RwLock as AsyncRwLock;
#[doc(no_inline)]
pub use async_lock::RwLockReadGuard as AsyncRwLockReadGuard;
#[doc(no_inline)]
pub use async_lock::RwLockWriteGuard as AsyncRwLockWriteGuard;
cfg_if! { cfg_if! {
if #[cfg(target_arch = "wasm32")] { if #[cfg(target_arch = "wasm32")] {
#[doc(no_inline)] #[doc(no_inline)]
@ -126,13 +133,6 @@ cfg_if! {
#[doc(no_inline)] #[doc(no_inline)]
pub use async_lock::MutexGuardArc as AsyncMutexGuardArc; pub use async_lock::MutexGuardArc as AsyncMutexGuardArc;
#[doc(no_inline)]
pub use async_lock::RwLock as AsyncRwLock;
#[doc(no_inline)]
pub use async_lock::RwLockReadGuard as AsyncRwLockReadGuard;
#[doc(no_inline)]
pub use async_lock::RwLockWriteGuard as AsyncRwLockWriteGuard;
#[doc(no_inline)] #[doc(no_inline)]
pub use async_executors::JoinHandle as LowLevelJoinHandle; pub use async_executors::JoinHandle as LowLevelJoinHandle;
@ -146,12 +146,12 @@ cfg_if! {
#[doc(no_inline)] #[doc(no_inline)]
pub use async_std::sync::MutexGuardArc as AsyncMutexGuardArc; pub use async_std::sync::MutexGuardArc as AsyncMutexGuardArc;
#[doc(no_inline)] // #[doc(no_inline)]
pub use async_std::sync::RwLock as AsyncRwLock; // pub use async_std::sync::RwLock as AsyncRwLock;
#[doc(no_inline)] // #[doc(no_inline)]
pub use async_std::sync::RwLockReadGuard as AsyncRwLockReadGuard; // pub use async_std::sync::RwLockReadGuard as AsyncRwLockReadGuard;
#[doc(no_inline)] // #[doc(no_inline)]
pub use async_std::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; // pub use async_std::sync::RwLockWriteGuard as AsyncRwLockWriteGuard;
#[doc(no_inline)] #[doc(no_inline)]
pub use async_std::task::JoinHandle as LowLevelJoinHandle; pub use async_std::task::JoinHandle as LowLevelJoinHandle;
@ -164,12 +164,12 @@ cfg_if! {
#[doc(no_inline)] #[doc(no_inline)]
pub use tokio::sync::OwnedMutexGuard as AsyncMutexGuardArc; pub use tokio::sync::OwnedMutexGuard as AsyncMutexGuardArc;
#[doc(no_inline)] // #[doc(no_inline)]
pub use tokio::sync::RwLock as AsyncRwLock; // pub use tokio::sync::RwLock as AsyncRwLock;
#[doc(no_inline)] // #[doc(no_inline)]
pub use tokio::sync::RwLockReadGuard as AsyncRwLockReadGuard; // pub use tokio::sync::RwLockReadGuard as AsyncRwLockReadGuard;
#[doc(no_inline)] // #[doc(no_inline)]
pub use tokio::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; // pub use tokio::sync::RwLockWriteGuard as AsyncRwLockWriteGuard;
#[doc(no_inline)] #[doc(no_inline)]

View File

@ -34,8 +34,22 @@ impl<'a> StartupLockGuard<'a> {
#[derive(Debug)] #[derive(Debug)]
pub struct StartupLockEnterGuard<'a> { pub struct StartupLockEnterGuard<'a> {
_guard: AsyncRwLockReadGuard<'a, bool>, _guard: AsyncRwLockReadGuard<'a, bool>,
// #[cfg(feature = "debug-locks")]
id: usize,
// #[cfg(feature = "debug-locks")]
active_guards: Arc<Mutex<HashMap<usize, backtrace::Backtrace>>>,
} }
//#[cfg(feature = "debug-locks")]
impl<'a> Drop for StartupLockEnterGuard<'a> {
fn drop(&mut self) {
self.active_guards.lock().remove(&self.id);
}
}
//#[cfg(feature = "debug-locks")]
static GUARD_ID: AtomicUsize = AtomicUsize::new(0);
/// Synchronization mechanism that tracks the startup and shutdown of a region of code. /// Synchronization mechanism that tracks the startup and shutdown of a region of code.
/// Guarantees that some code can only be started up once and shut down only if it is /// Guarantees that some code can only be started up once and shut down only if it is
/// already started. /// already started.
@ -46,12 +60,16 @@ pub struct StartupLockEnterGuard<'a> {
#[derive(Debug)] #[derive(Debug)]
pub struct StartupLock { pub struct StartupLock {
rwlock: AsyncRwLock<bool>, rwlock: AsyncRwLock<bool>,
// #[cfg(feature = "debug-locks")]
active_guards: Arc<Mutex<HashMap<usize, backtrace::Backtrace>>>,
} }
impl StartupLock { impl StartupLock {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
rwlock: AsyncRwLock::new(false), rwlock: AsyncRwLock::new(false),
// #[cfg(feature = "debug-locks")]
active_guards: Arc::new(Mutex::new(HashMap::new())),
} }
} }
@ -91,7 +109,20 @@ impl StartupLock {
/// One must call 'success()' on the returned startup lock guard if shutdown was successful /// One must call 'success()' on the returned startup lock guard if shutdown was successful
/// otherwise the startup lock will not shift to the 'stopped' state. /// otherwise the startup lock will not shift to the 'stopped' state.
pub async fn shutdown(&self) -> Result<StartupLockGuard, StartupLockAlreadyShutDownError> { pub async fn shutdown(&self) -> Result<StartupLockGuard, StartupLockAlreadyShutDownError> {
let guard = self.rwlock.write().await; cfg_if! {
if #[cfg(feature = "debug-locks")] {
//let guard = self.rwlock.write().await;
} else {
let guard = self.rwlock.write().await;
// let guard = match timeout(30000, self.rwlock.write()).await {
// Ok(v) => v,
// Err(_) => {
// eprintln!("active guards: {:#?}", self.active_guards.lock().values().collect::<Vec<_>>());
// panic!("shutdown deadlock");
// }
// };
}
}
if !*guard { if !*guard {
return Err(StartupLockAlreadyShutDownError); return Err(StartupLockAlreadyShutDownError);
} }
@ -109,7 +140,16 @@ impl StartupLock {
if !*guard { if !*guard {
return Err(StartupLockNotStartedError); return Err(StartupLockNotStartedError);
} }
Ok(StartupLockEnterGuard { _guard: guard }) let out = StartupLockEnterGuard {
_guard: guard,
//#[cfg(feature = "debug-locks")]
id: GUARD_ID.fetch_add(1, Ordering::AcqRel),
active_guards: self.active_guards.clone(),
};
self.active_guards
.lock()
.insert(out.id, backtrace::Backtrace::new());
Ok(out)
} }
} }

View File

@ -114,8 +114,66 @@ pub async fn test_bad_enter() {
lock.enter().expect_err("should not enter when shut down"); lock.enter().expect_err("should not enter when shut down");
} }
pub async fn test_multiple_enter() {
info!("test_multiple_enter");
let lock = Arc::new(StartupLock::new());
let s1 = lock.startup().expect("should startup");
s1.success();
{
let _e1 = lock.enter().expect("should enter 1");
{
let _e2 = lock.enter().expect("should enter 2");
{
let _e3 = lock.enter().expect("should enter 3");
}
}
}
let e4 = lock.enter().expect("should enter 4");
let e5 = lock.enter().expect("should enter 5");
let e6 = lock.enter().expect("should enter 6");
//eprintln!("1");
let lock2 = lock.clone();
let jh = spawn(async move {
//eprintln!("2");
let guard = lock2.shutdown().await.expect("should shutdown");
//eprintln!("7");
sleep(2000).await;
//eprintln!("8");
guard.success();
});
sleep(1000).await;
//eprintln!("3");
assert!(!lock.is_started());
assert!(!lock.is_shut_down());
// Now drop the enter created before shutdown
drop(e4);
//eprintln!("4");
drop(e5);
//eprintln!("5");
drop(e6);
//eprintln!("6");
// This should finally exit
jh.await;
//eprintln!("9");
assert!(!lock.is_started());
assert!(lock.is_shut_down());
lock.enter().expect_err("should not enter");
}
pub async fn test_all() { pub async fn test_all() {
test_startup_shutdown().await; test_startup_shutdown().await;
test_contention().await; test_contention().await;
test_bad_enter().await; test_bad_enter().await;
test_multiple_enter().await;
} }

View File

@ -55,19 +55,19 @@ cfg_if::cfg_if! {
}; };
} }
#[macro_export] // #[macro_export]
macro_rules! asyncrwlock_try_read { // macro_rules! asyncrwlock_try_read {
($x:expr) => { // ($x:expr) => {
$x.try_read().ok() // $x.try_read().ok()
}; // };
} // }
#[macro_export] // #[macro_export]
macro_rules! asyncrwlock_try_write { // macro_rules! asyncrwlock_try_write {
($x:expr) => { // ($x:expr) => {
$x.try_write().ok() // $x.try_write().ok()
}; // };
} // }
} else { } else {
#[macro_export] #[macro_export]
macro_rules! asyncmutex_try_lock { macro_rules! asyncmutex_try_lock {
@ -87,21 +87,23 @@ cfg_if::cfg_if! {
$x.try_lock_arc() $x.try_lock_arc()
}; };
} }
#[macro_export]
macro_rules! asyncrwlock_try_read {
($x:expr) => {
$x.try_read()
};
}
#[macro_export]
macro_rules! asyncrwlock_try_write {
($x:expr) => {
$x.try_write()
};
}
} }
} }
#[macro_export]
macro_rules! asyncrwlock_try_read {
($x:expr) => {
$x.try_read()
};
}
#[macro_export]
macro_rules! asyncrwlock_try_write {
($x:expr) => {
$x.try_write()
};
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////
pub fn system_boxed<'a, Out>( pub fn system_boxed<'a, Out>(
@ -500,3 +502,24 @@ pub fn type_name_of_val<T: ?Sized>(_val: &T) -> &'static str {
pub fn map_to_string<X: ToString>(arg: X) -> String { pub fn map_to_string<X: ToString>(arg: X) -> String {
arg.to_string() arg.to_string()
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
pub struct DebugGuard {
counter: &'static AtomicUsize,
}
impl DebugGuard {
pub fn new(counter: &'static AtomicUsize) -> Self {
let c = counter.fetch_add(1, Ordering::SeqCst);
eprintln!("DebugGuard Entered: {}", c + 1);
Self { counter }
}
}
impl Drop for DebugGuard {
fn drop(&mut self) {
let c = self.counter.fetch_sub(1, Ordering::SeqCst);
eprintln!("DebugGuard Exited: {}", c - 1);
}
}