From 1b34239eb896032e1e22a89a3a9fd1b77c1138ca Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Fri, 19 Jul 2024 13:00:15 -0400 Subject: [PATCH 01/13] startup lock --- Cargo.lock | 1 - veilid-core/Cargo.toml | 1 - veilid-core/src/network_manager/mod.rs | 8 +- veilid-core/src/rpc_processor/mod.rs | 17 ++- veilid-core/src/rpc_processor/rpc_app_call.rs | 11 ++ .../src/rpc_processor/rpc_app_message.rs | 6 + .../src/rpc_processor/rpc_find_node.rs | 6 + .../src/rpc_processor/rpc_get_value.rs | 6 + .../src/rpc_processor/rpc_inspect_value.rs | 6 + .../src/rpc_processor/rpc_return_receipt.rs | 6 + .../src/rpc_processor/rpc_set_value.rs | 6 + veilid-core/src/rpc_processor/rpc_signal.rs | 6 + veilid-core/src/rpc_processor/rpc_status.rs | 6 + .../rpc_processor/rpc_validate_dial_info.rs | 6 + .../src/rpc_processor/rpc_value_changed.rs | 6 + .../src/rpc_processor/rpc_watch_value.rs | 6 + veilid-tools/src/lib.rs | 30 +++++ veilid-tools/src/startup_lock.rs | 120 +++++++++++++++++ veilid-tools/src/tests/common/mod.rs | 1 + .../src/tests/common/test_startup_lock.rs | 121 ++++++++++++++++++ veilid-tools/src/tests/native/mod.rs | 11 ++ veilid-tools/src/tools.rs | 26 ++++ veilid-tools/tests/web.rs | 7 + 23 files changed, 415 insertions(+), 5 deletions(-) create mode 100644 veilid-tools/src/startup_lock.rs create mode 100644 veilid-tools/src/tests/common/test_startup_lock.rs diff --git a/Cargo.lock b/Cargo.lock index 79dcd5f9..896ccc34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6034,7 +6034,6 @@ version = "0.3.3" dependencies = [ "argon2", "async-io 1.13.0", - "async-lock 2.8.0", "async-std", "async-std-resolver", "async-tls", diff --git a/veilid-core/Cargo.toml b/veilid-core/Cargo.toml index 561fbe09..ebcc40d9 100644 --- a/veilid-core/Cargo.toml +++ b/veilid-core/Cargo.toml @@ -191,7 +191,6 @@ async_executors = { version = "0.7.0", default-features = false, features = [ "bindgen", "timer", ] } -async-lock = "2.8.0" wasm-bindgen = "0.2.92" js-sys = "0.3.69" wasm-bindgen-futures = "0.4.42" diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index 3e7bd2ae..e32dc018 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -1132,7 +1132,13 @@ impl NetworkManager { source_noderef.merge_filter(NodeRefFilter::new().with_routing_domain(routing_domain)); // Pass message to RPC system - rpc.enqueue_direct_message(envelope, source_noderef, flow, routing_domain, body)?; + if let Err(e) = + rpc.enqueue_direct_message(envelope, source_noderef, flow, routing_domain, body) + { + // Couldn't enqueue, but not the sender's fault + log_net!(debug "failed to enqueue direct message: {}", e); + return Ok(false); + } // Inform caller that we dealt with the envelope locally Ok(true) diff --git a/veilid-core/src/rpc_processor/mod.rs b/veilid-core/src/rpc_processor/mod.rs index 6c15b135..cca7766b 100644 --- a/veilid-core/src/rpc_processor/mod.rs +++ b/veilid-core/src/rpc_processor/mod.rs @@ -292,6 +292,7 @@ struct RPCProcessorUnlockedInner { update_callback: UpdateCallback, waiting_rpc_table: OperationWaiter>, waiting_app_call_table: OperationWaiter, ()>, + startup_lock: StartupLock, } #[derive(Clone)] @@ -345,6 +346,7 @@ impl RPCProcessor { update_callback, waiting_rpc_table: OperationWaiter::new(), waiting_app_call_table: OperationWaiter::new(), + startup_lock: StartupLock::new(), } } pub fn new(network_manager: NetworkManager, update_callback: UpdateCallback) -> Self { @@ -377,6 +379,7 @@ impl RPCProcessor { #[instrument(level = "debug", skip_all, err)] pub async fn startup(&self) -> EyreResult<()> { log_rpc!(debug "startup rpc processor"); + let guard = self.unlocked_inner.startup_lock.startup()?; { let mut inner = self.inner.lock(); @@ -405,13 +408,18 @@ impl RPCProcessor { self.storage_manager .set_rpc_processor(Some(self.clone())) .await; - + + guard.success(); Ok(()) } #[instrument(level = "debug", skip_all)] pub async fn shutdown(&self) { log_rpc!(debug "starting rpc processor shutdown"); + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_rpc!(debug "rpc processor already shut down"); + return; + }; // Stop storage manager from using us self.storage_manager.set_rpc_processor(None).await; @@ -437,6 +445,7 @@ impl RPCProcessor { // Release the rpc processor *self.inner.lock() = Self::new_inner(); + guard.success(); log_rpc!(debug "finished rpc processor shutdown"); } @@ -536,9 +545,11 @@ impl RPCProcessor { &self, node_id: TypedKey, safety_selection: SafetySelection, - ) -> SendPinBoxFuture, RPCError>> { + ) -> SendPinBoxFuture, RPCError>> { let this = self.clone(); Box::pin(async move { + let _guard = this.unlocked_inner.startup_lock.enter().map_err(RPCError::map_try_again("not started up"))?; + let routing_table = this.routing_table(); // First see if we have the node in our routing table already @@ -1699,6 +1710,8 @@ impl RPCProcessor { routing_domain: RoutingDomain, body: Vec, ) -> EyreResult<()> { + let _guard = self.unlocked_inner.startup_lock.enter().map_err(RPCError::map_try_again("not started up"))?; + let header = RPCMessageHeader { detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect { envelope, diff --git a/veilid-core/src/rpc_processor/rpc_app_call.rs b/veilid-core/src/rpc_processor/rpc_app_call.rs index 333b9b83..cde79fe4 100644 --- a/veilid-core/src/rpc_processor/rpc_app_call.rs +++ b/veilid-core/src/rpc_processor/rpc_app_call.rs @@ -9,6 +9,12 @@ impl RPCProcessor { dest: Destination, message: Vec, ) -> RPCNetworkResult>> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + let debug_string = format!("AppCall(message(len)={}) => {}", message.len(), dest); let app_call_q = RPCOperationAppCallQ::new(message)?; @@ -152,6 +158,11 @@ impl RPCProcessor { call_id: OperationId, message: Vec, ) -> Result<(), RPCError> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; self.unlocked_inner .waiting_app_call_table .complete_op_waiter(call_id, message) diff --git a/veilid-core/src/rpc_processor/rpc_app_message.rs b/veilid-core/src/rpc_processor/rpc_app_message.rs index 07fa2d08..b265c9eb 100644 --- a/veilid-core/src/rpc_processor/rpc_app_message.rs +++ b/veilid-core/src/rpc_processor/rpc_app_message.rs @@ -9,6 +9,12 @@ impl RPCProcessor { dest: Destination, message: Vec, ) -> RPCNetworkResult<()> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + let app_message = RPCOperationAppMessage::new(message)?; let statement = RPCStatement::new(RPCStatementDetail::AppMessage(Box::new(app_message))); diff --git a/veilid-core/src/rpc_processor/rpc_find_node.rs b/veilid-core/src/rpc_processor/rpc_find_node.rs index ee53205a..e6f8c376 100644 --- a/veilid-core/src/rpc_processor/rpc_find_node.rs +++ b/veilid-core/src/rpc_processor/rpc_find_node.rs @@ -14,6 +14,12 @@ impl RPCProcessor { node_id: TypedKey, capabilities: Vec, ) -> RPCNetworkResult>> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route if matches!( dest, diff --git a/veilid-core/src/rpc_processor/rpc_get_value.rs b/veilid-core/src/rpc_processor/rpc_get_value.rs index 98586df1..36aad09c 100644 --- a/veilid-core/src/rpc_processor/rpc_get_value.rs +++ b/veilid-core/src/rpc_processor/rpc_get_value.rs @@ -30,6 +30,12 @@ impl RPCProcessor { subkey: ValueSubkey, last_descriptor: Option, ) ->RPCNetworkResult> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route // and get the target noderef so we can validate the response let Some(target) = dest.node() else { diff --git a/veilid-core/src/rpc_processor/rpc_inspect_value.rs b/veilid-core/src/rpc_processor/rpc_inspect_value.rs index 537d3386..bebb5a11 100644 --- a/veilid-core/src/rpc_processor/rpc_inspect_value.rs +++ b/veilid-core/src/rpc_processor/rpc_inspect_value.rs @@ -32,6 +32,12 @@ impl RPCProcessor { subkeys: ValueSubkeyRangeSet, last_descriptor: Option, ) -> RPCNetworkResult> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route // and get the target noderef so we can validate the response let Some(target) = dest.node() else { diff --git a/veilid-core/src/rpc_processor/rpc_return_receipt.rs b/veilid-core/src/rpc_processor/rpc_return_receipt.rs index 2e6c4478..6519d404 100644 --- a/veilid-core/src/rpc_processor/rpc_return_receipt.rs +++ b/veilid-core/src/rpc_processor/rpc_return_receipt.rs @@ -9,6 +9,12 @@ impl RPCProcessor { dest: Destination, receipt: D, ) -> RPCNetworkResult<()> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + let receipt = receipt.as_ref().to_vec(); let return_receipt = RPCOperationReturnReceipt::new(receipt)?; diff --git a/veilid-core/src/rpc_processor/rpc_set_value.rs b/veilid-core/src/rpc_processor/rpc_set_value.rs index 57b853ed..7ba3cb1f 100644 --- a/veilid-core/src/rpc_processor/rpc_set_value.rs +++ b/veilid-core/src/rpc_processor/rpc_set_value.rs @@ -34,6 +34,12 @@ impl RPCProcessor { descriptor: SignedValueDescriptor, send_descriptor: bool, ) ->RPCNetworkResult> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route // and get the target noderef so we can validate the response let Some(target) = dest.node() else { diff --git a/veilid-core/src/rpc_processor/rpc_signal.rs b/veilid-core/src/rpc_processor/rpc_signal.rs index e6a02a08..b5c95c35 100644 --- a/veilid-core/src/rpc_processor/rpc_signal.rs +++ b/veilid-core/src/rpc_processor/rpc_signal.rs @@ -9,6 +9,12 @@ impl RPCProcessor { dest: Destination, signal_info: SignalInfo, ) -> RPCNetworkResult<()> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route if matches!( dest, diff --git a/veilid-core/src/rpc_processor/rpc_status.rs b/veilid-core/src/rpc_processor/rpc_status.rs index 03cf9008..66125305 100644 --- a/veilid-core/src/rpc_processor/rpc_status.rs +++ b/veilid-core/src/rpc_processor/rpc_status.rs @@ -20,6 +20,12 @@ impl RPCProcessor { self, dest: Destination, ) -> RPCNetworkResult>> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Determine routing domain and node status to send let (opt_target_nr, routing_domain, node_status) = if let Some(UnsafeRoutingInfo { opt_node, diff --git a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs index 12d5646a..02fbfaed 100644 --- a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs +++ b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs @@ -10,6 +10,12 @@ impl RPCProcessor { dial_info: DialInfo, redirect: bool, ) -> Result { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + let network_manager = self.network_manager(); let receipt_time = ms_to_us(self.unlocked_inner.validate_dial_info_receipt_time_ms); diff --git a/veilid-core/src/rpc_processor/rpc_value_changed.rs b/veilid-core/src/rpc_processor/rpc_value_changed.rs index 2d5c3380..e89cfeb9 100644 --- a/veilid-core/src/rpc_processor/rpc_value_changed.rs +++ b/veilid-core/src/rpc_processor/rpc_value_changed.rs @@ -13,6 +13,12 @@ impl RPCProcessor { watch_id: u64, value: Option, ) -> RPCNetworkResult<()> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination is never using a safety route if matches!(dest.get_safety_selection(), SafetySelection::Safe(_)) { return Err(RPCError::internal( diff --git a/veilid-core/src/rpc_processor/rpc_watch_value.rs b/veilid-core/src/rpc_processor/rpc_watch_value.rs index 04281d95..76fcc915 100644 --- a/veilid-core/src/rpc_processor/rpc_watch_value.rs +++ b/veilid-core/src/rpc_processor/rpc_watch_value.rs @@ -32,6 +32,12 @@ impl RPCProcessor { watcher: KeyPair, watch_id: Option, ) -> RPCNetworkResult> { + let _guard = self + .unlocked_inner + .startup_lock + .enter() + .map_err(RPCError::map_try_again("not started up"))?; + // Ensure destination never has a private route // and get the target noderef so we can validate the response let Some(target) = dest.node() else { diff --git a/veilid-tools/src/lib.rs b/veilid-tools/src/lib.rs index 27c3969b..05aaf566 100644 --- a/veilid-tools/src/lib.rs +++ b/veilid-tools/src/lib.rs @@ -49,6 +49,7 @@ pub mod single_shot_eventual; pub mod sleep; pub mod spawn; pub mod split_url; +pub mod startup_lock; pub mod tick_task; pub mod timeout; pub mod timeout_or; @@ -124,8 +125,17 @@ cfg_if! { pub use async_lock::MutexGuard as AsyncMutexGuard; #[doc(no_inline)] pub use async_lock::MutexGuardArc as AsyncMutexGuardArc; + + #[doc(no_inline)] + pub use async_lock::RwLock as AsyncRwLock; + #[doc(no_inline)] + pub use async_lock::RwLockReadGuard as AsyncRwLockReadGuard; + #[doc(no_inline)] + pub use async_lock::RwLockWriteGuard as AsyncRwLockWriteGuard; + #[doc(no_inline)] pub use async_executors::JoinHandle as LowLevelJoinHandle; + } else { cfg_if! { if #[cfg(feature="rt-async-std")] { @@ -135,8 +145,17 @@ cfg_if! { pub use async_std::sync::MutexGuard as AsyncMutexGuard; #[doc(no_inline)] pub use async_std::sync::MutexGuardArc as AsyncMutexGuardArc; + + #[doc(no_inline)] + pub use async_std::sync::RwLock as AsyncRwLock; + #[doc(no_inline)] + pub use async_std::sync::RwLockReadGuard as AsyncRwLockReadGuard; + #[doc(no_inline)] + pub use async_std::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; + #[doc(no_inline)] pub use async_std::task::JoinHandle as LowLevelJoinHandle; + } else if #[cfg(feature="rt-tokio")] { #[doc(no_inline)] pub use tokio::sync::Mutex as AsyncMutex; @@ -144,6 +163,15 @@ cfg_if! { pub use tokio::sync::MutexGuard as AsyncMutexGuard; #[doc(no_inline)] pub use tokio::sync::OwnedMutexGuard as AsyncMutexGuardArc; + + #[doc(no_inline)] + pub use tokio::sync::RwLock as AsyncRwLock; + #[doc(no_inline)] + pub use tokio::sync::RwLockReadGuard as AsyncRwLockReadGuard; + #[doc(no_inline)] + pub use tokio::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; + + #[doc(no_inline)] pub use tokio::task::JoinHandle as LowLevelJoinHandle; } else { @@ -202,6 +230,8 @@ pub use spawn::*; #[doc(inline)] pub use split_url::*; #[doc(inline)] +pub use startup_lock::*; +#[doc(inline)] pub use tick_task::*; #[doc(inline)] pub use timeout::*; diff --git a/veilid-tools/src/startup_lock.rs b/veilid-tools/src/startup_lock.rs new file mode 100644 index 00000000..26e58948 --- /dev/null +++ b/veilid-tools/src/startup_lock.rs @@ -0,0 +1,120 @@ +use super::*; + +#[derive(ThisError, Debug, Copy, Clone, PartialEq, Eq)] +#[error("Already started")] +pub struct StartupLockAlreadyStartedError; + +#[derive(ThisError, Debug, Copy, Clone, PartialEq, Eq)] +#[error("Already shut down")] +pub struct StartupLockAlreadyShutDownError; + +#[derive(ThisError, Debug, Copy, Clone, PartialEq, Eq)] +#[error("Not started")] +pub struct StartupLockNotStartedError; + +/// RAII-style lock for startup and shutdown operations +/// Must call 'success()' on this lock to report a successful startup or shutdown +/// Dropping this lock without calling 'success()' first indicates a failed +/// startup or shutdown operation +#[derive(Debug)] +pub struct StartupLockGuard<'a> { + guard: AsyncRwLockWriteGuard<'a, bool>, + success_value: bool, +} + +impl<'a> StartupLockGuard<'a> { + /// Call this function at the end of a successful startup or shutdown + /// operation to switch the state of the StartupLock. + pub fn success(mut self) { + *self.guard = self.success_value; + } +} + +/// RAII-style lock for entry operations on a started-up region of code. +#[derive(Debug)] +pub struct StartupLockEnterGuard<'a> { + _guard: AsyncRwLockReadGuard<'a, bool>, +} + +/// Synchronization mechanism that tracks the startup and shutdown of a region of code. +/// Guarantees that some code can only be started up once and shut down only if it is +/// already started. +/// Also tracks if the code is in-use and will wait for all 'entered' code to finish +/// before shutting down. Once a shutdown is requested, future calls to 'enter' will +/// fail, ensuring that nothing is 'entered' at the time of shutdown. This allows an +/// asynchronous shutdown to wait for operations to finish before proceeding. +#[derive(Debug)] +pub struct StartupLock { + rwlock: AsyncRwLock, +} + +impl StartupLock { + pub fn new() -> Self { + Self { + rwlock: AsyncRwLock::new(false), + } + } + + /// Start up if things are not already started up + /// One must call 'success()' on the returned startup lock guard if startup was successful + /// otherwise the startup lock will not shift to the 'started' state. + pub fn startup(&self) -> Result { + let guard = asyncrwlock_try_write!(self.rwlock).ok_or(StartupLockAlreadyStartedError)?; + if *guard { + return Err(StartupLockAlreadyStartedError); + } + Ok(StartupLockGuard { + guard, + success_value: true, + }) + } + + /// Check if this StartupLock is currently in a started state + /// Returns false is the state is in transition + pub fn is_started(&self) -> bool { + let Some(guard) = asyncrwlock_try_read!(self.rwlock) else { + return false; + }; + *guard + } + + /// Check if this StartupLock is currently in a shut down state + /// Returns false is the state is in transition + pub fn is_shut_down(&self) -> bool { + let Some(guard) = asyncrwlock_try_read!(self.rwlock) else { + return false; + }; + !*guard + } + + /// Wait for all 'entered' operations to finish before shutting down + /// One must call 'success()' on the returned startup lock guard if shutdown was successful + /// otherwise the startup lock will not shift to the 'stopped' state. + pub async fn shutdown(&self) -> Result { + let guard = self.rwlock.write().await; + if !*guard { + return Err(StartupLockAlreadyShutDownError); + } + Ok(StartupLockGuard { + guard, + success_value: false, + }) + } + + /// Enter an operation in a started-up module. + /// If this module has not yet started up or is in the process of startup or shutdown + /// this will fail. + pub fn enter(&self) -> Result { + let guard = asyncrwlock_try_read!(self.rwlock).ok_or(StartupLockNotStartedError)?; + if !*guard { + return Err(StartupLockNotStartedError); + } + Ok(StartupLockEnterGuard { _guard: guard }) + } +} + +impl Default for StartupLock { + fn default() -> Self { + Self::new() + } +} diff --git a/veilid-tools/src/tests/common/mod.rs b/veilid-tools/src/tests/common/mod.rs index 0b2d1bd2..bcfafea0 100644 --- a/veilid-tools/src/tests/common/mod.rs +++ b/veilid-tools/src/tests/common/mod.rs @@ -1,5 +1,6 @@ pub mod test_async_tag_lock; pub mod test_host_interface; +pub mod test_startup_lock; #[allow(dead_code)] pub static DEFAULT_LOG_IGNORE_LIST: [&str; 21] = [ diff --git a/veilid-tools/src/tests/common/test_startup_lock.rs b/veilid-tools/src/tests/common/test_startup_lock.rs new file mode 100644 index 00000000..656f3e30 --- /dev/null +++ b/veilid-tools/src/tests/common/test_startup_lock.rs @@ -0,0 +1,121 @@ +use crate::*; + +pub async fn test_startup_shutdown() { + info!("test_startup_shutdown"); + + let lock = StartupLock::new(); + + // Normal case + { + let guard = lock.startup().expect("should startup"); + guard.success(); + } + assert!(lock.is_started()); + assert!(!lock.is_shut_down()); + + { + let guard = lock.shutdown().await.expect("should shutdown"); + guard.success(); + } + assert!(!lock.is_started()); + assert!(lock.is_shut_down()); + + // Startup fail case + { + lock.startup().expect("should startup"); + // Don't call success() + } + assert!(!lock.is_started()); + { + lock.shutdown().await.expect_err("should not shutdown"); + } + assert!(!lock.is_started()); + + // Shutdown fail case + { + let guard = lock.startup().expect("should startup"); + guard.success(); + } + assert!(lock.is_started()); + { + lock.shutdown().await.expect("should shutdown"); + // Don't call success() + } + assert!(lock.is_started()); + { + let guard = lock.shutdown().await.expect("should shutdown"); + guard.success(); + } + assert!(!lock.is_started()); +} + +pub async fn test_contention() { + info!("test_contention"); + + let lock = Arc::new(StartupLock::new()); + let val = Arc::new(AtomicBool::new(false)); + + { + let guard = lock.startup().expect("should startup"); + guard.success(); + } + assert!(lock.is_started()); + let lock2 = lock.clone(); + let val2 = val.clone(); + let jh = spawn(async move { + let _guard = lock2.enter().expect("should enter"); + sleep(2000).await; + val2.store(true, Ordering::Release); + }); + sleep(1000).await; + { + let guard = lock.shutdown().await.expect("should shutdown"); + assert!( + val.load(Ordering::Acquire), + "should have waited for enter to exit" + ); + guard.success(); + } + assert!(!lock.is_started()); + jh.await; +} + +pub async fn test_bad_enter() { + info!("test_bad_enter"); + + let lock = Arc::new(StartupLock::new()); + + lock.enter() + .expect_err("should not enter when not started up"); + { + let guard = lock.startup().expect("should startup"); + guard.success(); + } + assert!(lock.is_started()); + assert!(!lock.is_shut_down()); + + let lock2 = lock.clone(); + let jh = spawn(async move { + let guard = lock2.shutdown().await.expect("should shutdown"); + sleep(2000).await; + guard.success(); + }); + sleep(1000).await; + assert!(!lock.is_started()); + assert!(!lock.is_shut_down()); + + lock.enter() + .expect_err("should not enter when shutting down"); + + jh.await; + + assert!(!lock.is_started()); + assert!(lock.is_shut_down()); + + lock.enter().expect_err("should not enter when shut down"); +} +pub async fn test_all() { + test_startup_shutdown().await; + test_contention().await; + test_bad_enter().await; +} diff --git a/veilid-tools/src/tests/native/mod.rs b/veilid-tools/src/tests/native/mod.rs index da61dfff..8e7d631d 100644 --- a/veilid-tools/src/tests/native/mod.rs +++ b/veilid-tools/src/tests/native/mod.rs @@ -14,6 +14,8 @@ use super::*; pub async fn run_all_tests() { info!("TEST: exec_test_host_interface"); test_host_interface::test_all().await; + info!("TEST: exec_test_startup_lock"); + test_startup_lock::test_all().await; info!("TEST: exec_test_network_interfaces"); test_network_interfaces::test_all().await; info!("TEST: exec_test_async_peek_stream"); @@ -87,6 +89,15 @@ cfg_if! { }); } + #[test] + #[serial] + fn run_test_startup_lock() { + setup(); + block_on(async { + test_startup_lock::test_all().await; + }); + } + #[test] #[serial] fn run_test_network_interfaces() { diff --git a/veilid-tools/src/tools.rs b/veilid-tools/src/tools.rs index fb20a5bc..4d378891 100644 --- a/veilid-tools/src/tools.rs +++ b/veilid-tools/src/tools.rs @@ -54,6 +54,20 @@ cfg_if::cfg_if! { $x.clone().try_lock_owned().ok() }; } + + #[macro_export] + macro_rules! asyncrwlock_try_read { + ($x:expr) => { + $x.try_read().ok() + }; + } + + #[macro_export] + macro_rules! asyncrwlock_try_write { + ($x:expr) => { + $x.try_write().ok() + }; + } } else { #[macro_export] macro_rules! asyncmutex_try_lock { @@ -73,6 +87,18 @@ cfg_if::cfg_if! { $x.try_lock_arc() }; } + #[macro_export] + macro_rules! asyncrwlock_try_read { + ($x:expr) => { + $x.try_read() + }; + } + #[macro_export] + macro_rules! asyncrwlock_try_write { + ($x:expr) => { + $x.try_write() + }; + } } } diff --git a/veilid-tools/tests/web.rs b/veilid-tools/tests/web.rs index fd0fe7cc..6c24d3c8 100644 --- a/veilid-tools/tests/web.rs +++ b/veilid-tools/tests/web.rs @@ -44,3 +44,10 @@ async fn run_test_async_tag_lock() { test_async_tag_lock::test_all().await; } + +#[wasm_bindgen_test] +async fn run_test_startup_lock() { + setup(); + + test_startup_lock::test_all().await; +} From c207e498b4ece9b763ffe4b875f6a981983cb055 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Fri, 19 Jul 2024 14:17:45 -0400 Subject: [PATCH 02/13] more startup locks --- .../src/network_manager/connection_manager.rs | 29 ++++++++- veilid-core/src/network_manager/mod.rs | 6 +- veilid-core/src/network_manager/native/mod.rs | 62 ++++++++++++------- .../src/network_manager/receipt_manager.rs | 37 ++++++++++- veilid-core/src/veilid_api/debug.rs | 6 +- 5 files changed, 110 insertions(+), 30 deletions(-) diff --git a/veilid-core/src/network_manager/connection_manager.rs b/veilid-core/src/network_manager/connection_manager.rs index ba9e7a68..e87429ce 100644 --- a/veilid-core/src/network_manager/connection_manager.rs +++ b/veilid-core/src/network_manager/connection_manager.rs @@ -53,6 +53,7 @@ struct ConnectionManagerArc { connection_inactivity_timeout_ms: u32, connection_table: ConnectionTable, address_lock_table: AsyncTagLockTable, + startup_lock: StartupLock, inner: Mutex>, } impl core::fmt::Debug for ConnectionManagerArc { @@ -98,6 +99,7 @@ impl ConnectionManager { connection_inactivity_timeout_ms, connection_table: ConnectionTable::new(config, address_filter), address_lock_table: AsyncTagLockTable::new(), + startup_lock: StartupLock::new(), inner: Mutex::new(None), } } @@ -115,8 +117,11 @@ impl ConnectionManager { self.arc.connection_inactivity_timeout_ms } - pub async fn startup(&self) { + pub async fn startup(&self) -> EyreResult<()> { + let guard = self.arc.startup_lock.startup()?; + log_net!(debug "startup connection manager"); + let mut inner = self.arc.inner.lock(); if inner.is_some() { panic!("shouldn't start connection manager twice without shutting it down first"); @@ -133,10 +138,19 @@ impl ConnectionManager { // Store in the inner object *inner = Some(Self::new_inner(stop_source, sender, async_processor)); + + guard.success(); + + Ok(()) } pub async fn shutdown(&self) { log_net!(debug "starting connection manager shutdown"); + let Ok(guard) = self.arc.startup_lock.shutdown().await else { + log_net!(debug "connection manager is already shut down"); + return; + }; + // Remove the inner from the lock let mut inner = { let mut inner_lock = self.arc.inner.lock(); @@ -158,6 +172,8 @@ impl ConnectionManager { // Wait for the connections to complete log_net!(debug "waiting for connection handlers to complete"); self.arc.connection_table.join().await; + + guard.success(); log_net!(debug "finished connection manager shutdown"); } @@ -263,6 +279,9 @@ impl ConnectionManager { // Returns a network connection if one already is established pub fn get_connection(&self, flow: Flow) -> Option { + let Ok(_guard) = self.arc.startup_lock.enter() else { + return None; + }; self.arc.connection_table.peek_connection_by_flow(flow) } @@ -276,6 +295,9 @@ impl ConnectionManager { self.arc.connection_table.ref_connection_by_id(id, kind) } pub fn try_connection_ref_scope(&self, id: NetworkConnectionId) -> Option { + let Ok(_guard) = self.arc.startup_lock.enter() else { + return None; + }; ConnectionRefScope::try_new(self.clone(), id) } @@ -288,6 +310,11 @@ impl ConnectionManager { &self, dial_info: DialInfo, ) -> EyreResult> { + let Ok(_guard) = self.arc.startup_lock.enter() else { + return Ok(NetworkResult::service_unavailable( + "connection manager is not started", + )); + }; let peer_address = dial_info.peer_address(); let remote_addr = peer_address.socket_addr(); let mut preferred_local_address = self diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index e32dc018..b83b7c44 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -428,7 +428,7 @@ impl NetworkManager { }); // Start network components - connection_manager.startup().await; + connection_manager.startup().await?; match net.startup().await? { StartupDisposition::Success => {} StartupDisposition::BindRetry => { @@ -546,9 +546,7 @@ impl NetworkManager { } pub fn network_is_started(&self) -> bool { - self.opt_net() - .and_then(|net| net.is_started()) - .unwrap_or(false) + self.opt_net().map(|net| net.is_started()).unwrap_or(false) } pub fn generate_node_status(&self, _routing_domain: RoutingDomain) -> NodeStatus { diff --git a/veilid-core/src/network_manager/native/mod.rs b/veilid-core/src/network_manager/native/mod.rs index 815ed96f..a0ba7162 100644 --- a/veilid-core/src/network_manager/native/mod.rs +++ b/veilid-core/src/network_manager/native/mod.rs @@ -72,8 +72,6 @@ pub const MAX_CAPABILITIES: usize = 64; ///////////////////////////////////////////////////////////////// struct NetworkInner { - /// Some(true) if the low-level network is running, Some(false) if it is not, None if it is in transit - network_started: Option, /// set if the network needs to be restarted due to a low level configuration change /// such as dhcp release or change of address or interfaces being added or removed network_needs_restart: bool, @@ -114,6 +112,9 @@ struct NetworkInner { } struct NetworkUnlockedInner { + // Startup lock + startup_lock: StartupLock, + // Accessors routing_table: RoutingTable, network_manager: NetworkManager, @@ -139,7 +140,6 @@ pub(in crate::network_manager) struct Network { impl Network { fn new_inner() -> NetworkInner { NetworkInner { - network_started: Some(false), network_needs_restart: false, needs_public_dial_info_check: false, network_already_cleared: false, @@ -168,6 +168,7 @@ impl Network { ) -> NetworkUnlockedInner { let config = network_manager.config(); NetworkUnlockedInner { + startup_lock: StartupLock::new(), network_manager, routing_table, connection_manager, @@ -335,12 +336,12 @@ impl Network { inner.preferred_local_addresses.get(&key).copied() } - pub fn is_stable_interface_address(&self, addr: IpAddr) -> bool { + pub(crate) fn is_stable_interface_address(&self, addr: IpAddr) -> bool { let stable_addrs = self.get_stable_interface_addresses(); stable_addrs.contains(&addr) } - pub fn get_stable_interface_addresses(&self) -> Vec { + pub(crate) fn get_stable_interface_addresses(&self) -> Vec { let addrs = self.unlocked_inner.interfaces.stable_addresses(); let mut addrs: Vec = addrs .into_iter() @@ -377,7 +378,7 @@ impl Network { //////////////////////////////////////////////////////////// // Record DialInfo failures - pub async fn record_dial_info_failure>>>( + async fn record_dial_info_failure>>>( &self, dial_info: DialInfo, fut: F, @@ -401,6 +402,8 @@ impl Network { dial_info: DialInfo, data: Vec, ) -> EyreResult> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure( dial_info.clone(), async move { @@ -476,6 +479,8 @@ impl Network { data: Vec, timeout_ms: u32, ) -> EyreResult>> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure( dial_info.clone(), async move { @@ -590,6 +595,8 @@ impl Network { flow: Flow, data: Vec, ) -> EyreResult { + let _guard = self.unlocked_inner.startup_lock.enter()?; + let data_len = data.len(); // Handle connectionless protocol @@ -655,6 +662,8 @@ impl Network { dial_info: DialInfo, data: Vec, ) -> EyreResult> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure( dial_info.clone(), async move { @@ -922,22 +931,20 @@ impl Network { #[instrument(level = "debug", err, skip_all)] pub async fn startup(&self) -> EyreResult { - self.inner.lock().network_started = None; + let guard = self.unlocked_inner.startup_lock.startup()?; match self.startup_internal().await { Ok(StartupDisposition::Success) => { info!("network started"); - self.inner.lock().network_started = Some(true); + guard.success(); Ok(StartupDisposition::Success) } Ok(StartupDisposition::BindRetry) => { debug!("network bind retry"); - self.inner.lock().network_started = Some(false); Ok(StartupDisposition::BindRetry) } Err(e) => { debug!("network failed to start"); - self.inner.lock().network_started = Some(false); Err(e) } } @@ -947,8 +954,8 @@ impl Network { self.inner.lock().network_needs_restart } - pub fn is_started(&self) -> Option { - self.inner.lock().network_started + pub fn is_started(&self) -> bool { + self.unlocked_inner.startup_lock.is_started() } #[instrument(level = "debug", skip_all)] @@ -959,8 +966,10 @@ impl Network { #[instrument(level = "debug", skip_all)] pub async fn shutdown(&self) { log_net!(debug "starting low level network shutdown"); - - self.inner.lock().network_started = None; + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_net!(debug "low level network is already shut down"); + return; + }; let routing_table = self.routing_table(); @@ -1006,6 +1015,7 @@ impl Network { // Reset state including network class *self.inner.lock() = Self::new_inner(); + guard.success(); log_net!(debug "finished low level network shutdown"); } @@ -1014,12 +1024,20 @@ impl Network { &self, punishment: Option>, ) { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return; + }; let mut inner = self.inner.lock(); inner.needs_public_dial_info_check = true; inner.public_dial_info_check_punishment = punishment; } pub fn needs_public_dial_info_check(&self) -> bool { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return false; + }; let inner = self.inner.lock(); inner.needs_public_dial_info_check } @@ -1027,7 +1045,7 @@ impl Network { ////////////////////////////////////////// #[instrument(level = "trace", target = "net", skip_all, err)] - pub async fn network_interfaces_task_routine( + async fn network_interfaces_task_routine( self, _stop_token: StopToken, _l: u64, @@ -1039,12 +1057,7 @@ impl Network { } #[instrument(level = "trace", target = "net", skip_all, err)] - pub async fn upnp_task_routine( - self, - _stop_token: StopToken, - _l: u64, - _t: u64, - ) -> EyreResult<()> { + async fn upnp_task_routine(self, _stop_token: StopToken, _l: u64, _t: u64) -> EyreResult<()> { if !self.unlocked_inner.igd_manager.tick().await? { info!("upnp failed, restarting local network"); let mut inner = self.inner.lock(); @@ -1055,7 +1068,12 @@ impl Network { } #[instrument(level = "trace", target = "net", skip_all, err)] - pub async fn tick(&self) -> EyreResult<()> { + pub(crate) async fn tick(&self) -> EyreResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return Ok(()); + }; + let (detect_address_changes, upnp) = { let config = self.network_manager().config(); let c = config.get(); diff --git a/veilid-core/src/network_manager/receipt_manager.rs b/veilid-core/src/network_manager/receipt_manager.rs index 50109a6a..ae8bf083 100644 --- a/veilid-core/src/network_manager/receipt_manager.rs +++ b/veilid-core/src/network_manager/receipt_manager.rs @@ -158,9 +158,14 @@ struct ReceiptManagerInner { timeout_task: MustJoinSingleFuture<()>, } +struct ReceiptManagerUnlockedInner { + startup_lock: StartupLock, +} + #[derive(Clone)] pub(super) struct ReceiptManager { inner: Arc>, + unlocked_inner: Arc, } impl ReceiptManager { @@ -177,6 +182,9 @@ impl ReceiptManager { pub fn new(network_manager: NetworkManager) -> Self { Self { inner: Arc::new(Mutex::new(Self::new_inner(network_manager))), + unlocked_inner: Arc::new(ReceiptManagerUnlockedInner { + startup_lock: StartupLock::new(), + }), } } @@ -185,6 +193,7 @@ impl ReceiptManager { } pub async fn startup(&self) -> EyreResult<()> { + let guard = self.unlocked_inner.startup_lock.startup()?; log_net!(debug "startup receipt manager"); // Retrieve config @@ -195,6 +204,7 @@ impl ReceiptManager { inner.stop_source = Some(StopSource::new()); } + guard.success(); Ok(()) } @@ -223,7 +233,7 @@ impl ReceiptManager { } #[instrument(level = "trace", target = "receipt", skip_all)] - pub async fn timeout_task_routine(self, now: Timestamp, stop_token: StopToken) { + async fn timeout_task_routine(self, now: Timestamp, stop_token: StopToken) { // Go through all receipts and build a list of expired nonces let mut new_next_oldest_ts: Option = None; let mut expired_records = Vec::new(); @@ -273,6 +283,10 @@ impl ReceiptManager { #[instrument(level = "trace", target = "receipt", skip_all, err)] pub async fn tick(&self) -> EyreResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return Ok(()); + }; + let (next_oldest_ts, timeout_task, stop_token) = { let inner = self.inner.lock(); let stop_token = match inner.stop_source.as_ref() { @@ -303,6 +317,11 @@ impl ReceiptManager { pub async fn shutdown(&self) { log_net!(debug "starting receipt manager shutdown"); + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_net!(debug "receipt manager is already shut down"); + return; + }; + let network_manager = self.network_manager(); // Stop all tasks @@ -320,6 +339,8 @@ impl ReceiptManager { } *self.inner.lock() = Self::new_inner(network_manager); + + guard.success(); log_net!(debug "finished receipt manager shutdown"); } @@ -332,6 +353,10 @@ impl ReceiptManager { expected_returns: u32, callback: impl ReceiptCallback, ) { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return; + }; let receipt_nonce = receipt.get_nonce(); event!(target: "receipt", Level::DEBUG, "== New Multiple Receipt ({}) {} ", expected_returns, receipt_nonce.encode()); let record = Arc::new(Mutex::new(ReceiptRecord::new( @@ -353,6 +378,10 @@ impl ReceiptManager { expiration: Timestamp, eventual: ReceiptSingleShotType, ) { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return; + }; let receipt_nonce = receipt.get_nonce(); event!(target: "receipt", Level::DEBUG, "== New SingleShot Receipt {}", receipt_nonce.encode()); @@ -385,6 +414,8 @@ impl ReceiptManager { pub async fn cancel_receipt(&self, nonce: &Nonce) -> EyreResult<()> { event!(target: "receipt", Level::DEBUG, "== Cancel Receipt {}", nonce.encode()); + let _guard = self.unlocked_inner.startup_lock.enter()?; + // Remove the record let record = { let mut inner = self.inner.lock(); @@ -417,6 +448,10 @@ impl ReceiptManager { receipt: Receipt, receipt_returned: ReceiptReturned, ) -> NetworkResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return NetworkResult::service_unavailable("receipt manager not started"); + }; + let receipt_nonce = receipt.get_nonce(); let extra_data = receipt.get_extra_data(); diff --git a/veilid-core/src/veilid_api/debug.rs b/veilid-core/src/veilid_api/debug.rs index 05720f2b..d862a594 100644 --- a/veilid-core/src/veilid_api/debug.rs +++ b/veilid-core/src/veilid_api/debug.rs @@ -867,9 +867,11 @@ impl VeilidAPI { .purge_last_connections(); if let Some(connection_manager) = &opt_connection_manager { - connection_manager.startup().await; + connection_manager + .startup() + .await + .map_err(VeilidAPIError::internal)?; } - Ok("Connections purged".to_owned()) } else if args[0] == "routes" { // Purge route spec store From fdead16fc8f4c8aac8a7f91c45d5385ef7a20964 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Fri, 19 Jul 2024 14:33:31 -0400 Subject: [PATCH 03/13] wasm startup lock --- veilid-core/src/network_manager/wasm/mod.rs | 67 ++++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/veilid-core/src/network_manager/wasm/mod.rs b/veilid-core/src/network_manager/wasm/mod.rs index 0340c9f7..25890b00 100644 --- a/veilid-core/src/network_manager/wasm/mod.rs +++ b/veilid-core/src/network_manager/wasm/mod.rs @@ -58,6 +58,9 @@ struct NetworkInner { } struct NetworkUnlockedInner { + // Startup lock + startup_lock: StartupLock, + // Accessors routing_table: RoutingTable, network_manager: NetworkManager, @@ -86,6 +89,7 @@ impl Network { connection_manager: ConnectionManager, ) -> NetworkUnlockedInner { NetworkUnlockedInner { + startup_lock: StartupLock::new(), network_manager, routing_table, connection_manager, @@ -121,7 +125,7 @@ impl Network { ///////////////////////////////////////////////////////////////// // Record DialInfo failures - pub async fn record_dial_info_failure>>>( + async fn record_dial_info_failure>>>( &self, dial_info: DialInfo, fut: F, @@ -135,12 +139,18 @@ impl Network { Ok(network_result) } - #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] + // Send data to a dial info, unbound, using a new connection from a random port + // This creates a short-lived connection in the case of connection-oriented protocols + // for the purpose of sending this one message. + // This bypasses the connection table as it is not a 'node to node' connection. + #[instrument(level="trace", target="net", err, skip(self, data), fields(data.len = data.len()))] pub async fn send_data_unbound_to_dial_info( &self, dial_info: DialInfo, data: Vec, ) -> EyreResult> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure(dial_info.clone(), async move { let data_len = data.len(); let timeout_ms = { @@ -187,13 +197,15 @@ impl Network { // This creates a short-lived connection in the case of connection-oriented protocols // for the purpose of sending this one message. // This bypasses the connection table as it is not a 'node to node' connection. - #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] + #[instrument(level="trace", target="net", err, skip(self, data), fields(data.len = data.len()))] pub async fn send_recv_data_unbound_to_dial_info( &self, dial_info: DialInfo, data: Vec, timeout_ms: u32, ) -> EyreResult>> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure(dial_info.clone(), async move { let data_len = data.len(); let connect_timeout_ms = { @@ -247,12 +259,14 @@ impl Network { .await } - #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] + #[instrument(level="trace", target="net", err, skip(self, data), fields(data.len = data.len()))] pub async fn send_data_to_existing_flow( &self, flow: Flow, data: Vec, ) -> EyreResult { + let _guard = self.unlocked_inner.startup_lock.enter()?; + let data_len = data.len(); match flow.protocol_type() { ProtocolType::UDP => { @@ -292,12 +306,16 @@ impl Network { Ok(SendDataToExistingFlowResult::NotSent(data)) } - #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] + // Send data directly to a dial info, possibly without knowing which node it is going to + // Returns a flow for the connection used to send the data + #[instrument(level="trace", target="net", err, skip(self, data), fields(data.len = data.len()))] pub async fn send_data_to_dial_info( &self, dial_info: DialInfo, data: Vec, ) -> EyreResult> { + let _guard = self.unlocked_inner.startup_lock.enter()?; + self.record_dial_info_failure(dial_info.clone(), async move { let data_len = data.len(); if dial_info.protocol_type() == ProtocolType::UDP { @@ -399,23 +417,22 @@ impl Network { Ok(StartupDisposition::Success) } + #[instrument(level = "debug", err, skip_all)] pub async fn startup(&self) -> EyreResult { - self.inner.lock().network_started = None; + let guard = self.unlocked_inner.startup_lock.startup()?; match self.startup_internal().await { Ok(StartupDisposition::Success) => { info!("network started"); - self.inner.lock().network_started = Some(true); + guard.success(); Ok(StartupDisposition::Success) } Ok(StartupDisposition::BindRetry) => { debug!("network bind retry"); - self.inner.lock().network_started = Some(false); Ok(StartupDisposition::BindRetry) } Err(e) => { debug!("network failed to start"); - self.inner.lock().network_started = Some(false); Err(e) } } @@ -425,16 +442,22 @@ impl Network { self.inner.lock().network_needs_restart } - pub fn is_started(&self) -> Option { - self.inner.lock().network_started + pub fn is_started(&self) -> bool { + self.unlocked_inner.startup_lock.is_started() } + #[instrument(level = "debug", skip_all)] pub fn restart_network(&self) { self.inner.lock().network_needs_restart = true; } + #[instrument(level = "debug", skip_all)] pub async fn shutdown(&self) { - log_net!(debug "stopping network"); + log_net!(debug "starting low level network shutdown"); + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_net!(debug "low level network is already shut down"); + return; + }; // Reset state let routing_table = self.routing_table(); @@ -451,7 +474,8 @@ impl Network { // Cancels all async background tasks by dropping join handles *self.inner.lock() = Self::new_inner(); - log_net!(debug "network stopped"); + guard.success(); + log_net!(debug "finished low level network shutdown"); } pub fn get_preferred_local_address(&self, _dial_info: &DialInfo) -> Option { @@ -472,15 +496,28 @@ impl Network { &self, _punishment: Option>, ) { - // + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return; + }; } pub fn needs_public_dial_info_check(&self) -> bool { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return false; + }; + false } ////////////////////////////////////////// - pub async fn tick(&self) -> EyreResult<()> { + pub(crate) async fn tick(&self) -> EyreResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + log_net!(debug "ignoring due to not started up"); + return Ok(()); + }; + Ok(()) } } From e568c39efb566379671c416ecff1fbf35f7f027a Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sat, 20 Jul 2024 18:05:24 -0400 Subject: [PATCH 04/13] shutdown work and perfetto logger --- Cargo.lock | 105 +++++++++++++++++- .../src/network_manager/connection_manager.rs | 4 + veilid-core/src/network_manager/mod.rs | 81 ++++++++++++-- veilid-core/src/network_manager/native/mod.rs | 21 ++-- .../src/network_manager/network_connection.rs | 4 - veilid-core/src/network_manager/stats.rs | 4 +- .../src/routing_table/tasks/ping_validator.rs | 18 +-- veilid-server/Cargo.toml | 1 + veilid-server/src/main.rs | 18 ++- veilid-server/src/settings.rs | 23 ++++ veilid-server/src/veilid_logs.rs | 32 +++++- veilid-tools/Cargo.toml | 3 +- veilid-tools/src/lib.rs | 40 +++---- veilid-tools/src/startup_lock.rs | 44 +++++++- .../src/tests/common/test_startup_lock.rs | 58 ++++++++++ veilid-tools/src/tools.rs | 71 ++++++++---- 16 files changed, 439 insertions(+), 88 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 896ccc34..21c7f3ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1958,6 +1958,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flate2" version = "1.0.30" @@ -3147,6 +3153,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + [[package]] name = "nanorand" version = "0.7.0" @@ -4019,6 +4031,16 @@ dependencies = [ "sha2 0.10.8", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap 2.2.6", +] + [[package]] name = "pharos" version = "0.5.3" @@ -4170,6 +4192,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn 2.0.71", +] + [[package]] name = "proc-macro-crate" version = "0.1.5" @@ -4218,6 +4250,27 @@ dependencies = [ "prost-derive 0.12.6", ] +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.12.1", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost 0.12.6", + "prost-types", + "regex", + "syn 2.0.71", + "tempfile", +] + [[package]] name = "prost-derive" version = "0.11.9" @@ -4259,6 +4312,15 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "protobuf-src" +version = "2.0.1+26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ba1cfa4b9dc098926b8cce388bf434b93516db3ecf6e8b1a37eb643d733ee7" +dependencies = [ + "cmake", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -5252,6 +5314,18 @@ dependencies = [ "windows 0.52.0", ] +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if 1.0.0", + "fastrand 2.1.0", + "rustix 0.38.34", + "windows-sys 0.52.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -5310,6 +5384,16 @@ dependencies = [ "syn 2.0.71", ] +[[package]] +name = "thread-id" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -5744,6 +5828,24 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-perfetto" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd21777b526dfcb57f11f65aa8a2024d83e1db52841993229b6e282e511978b7" +dependencies = [ + "anyhow", + "bytes", + "chrono", + "prost 0.12.6", + "prost-build", + "protobuf-src", + "rand", + "thread-id", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -6233,6 +6335,7 @@ dependencies = [ "tracing-flame", "tracing-journald", "tracing-opentelemetry 0.24.0", + "tracing-perfetto", "tracing-subscriber", "url", "veilid-bugsalot", @@ -6246,7 +6349,7 @@ name = "veilid-tools" version = "0.3.3" dependencies = [ "android_logger 0.13.3", - "async-lock 2.8.0", + "async-lock 3.4.0", "async-std", "async_executors", "backtrace", diff --git a/veilid-core/src/network_manager/connection_manager.rs b/veilid-core/src/network_manager/connection_manager.rs index e87429ce..2539a7a8 100644 --- a/veilid-core/src/network_manager/connection_manager.rs +++ b/veilid-core/src/network_manager/connection_manager.rs @@ -414,6 +414,10 @@ impl ConnectionManager { if !allow_accept { return; } + let Ok(_guard) = self.arc.startup_lock.enter() else { + return; + }; + // Async lock on the remote address for atomicity per remote let _lock_guard = self .arc diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index b83b7c44..524ebccd 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -64,6 +64,8 @@ pub const PUBLIC_ADDRESS_INCONSISTENCY_PUNISHMENT_TIMEOUT_US: TimestampDuration pub const ADDRESS_FILTER_TASK_INTERVAL_SECS: u32 = 60; pub const BOOT_MAGIC: &[u8; 4] = b"BOOT"; +static FUCK: AtomicUsize = AtomicUsize::new(0); + #[derive(Clone, Debug, Default)] pub struct ProtocolConfig { pub outbound: ProtocolTypeSet, @@ -172,6 +174,8 @@ struct NetworkManagerUnlockedInner { address_filter_task: TickTask, // Network Key network_key: Option, + // Startup Lock + startup_lock: StartupLock, } #[derive(Clone)] @@ -213,6 +217,7 @@ impl NetworkManager { public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS), address_filter_task: TickTask::new(ADDRESS_FILTER_TASK_INTERVAL_SECS), network_key, + startup_lock: StartupLock::new(), } } @@ -445,27 +450,30 @@ impl NetworkManager { #[instrument(level = "debug", skip_all, err)] pub async fn startup(&self) -> EyreResult { + let guard = self.unlocked_inner.startup_lock.startup()?; + match self.internal_startup().await { Ok(StartupDisposition::Success) => { + guard.success(); + // Inform api clients that things have changed self.send_network_update(); + Ok(StartupDisposition::Success) } Ok(StartupDisposition::BindRetry) => { - self.shutdown().await; + self.shutdown_internal().await; Ok(StartupDisposition::BindRetry) } Err(e) => { - self.shutdown().await; + self.shutdown_internal().await; Err(e) } } } #[instrument(level = "debug", skip_all)] - pub async fn shutdown(&self) { - log_net!(debug "starting network manager shutdown"); - + async fn shutdown_internal(&self) { // Cancel all tasks self.cancel_tasks().await; @@ -487,6 +495,20 @@ impl NetworkManager { { *self.inner.lock() = NetworkManager::new_inner(); } + } + + #[instrument(level = "debug", skip_all)] + pub async fn shutdown(&self) { + log_net!(debug "starting network manager shutdown"); + + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_net!(debug "network manager is already shut down"); + return; + }; + + self.shutdown_internal().await; + + guard.success(); // send update log_net!(debug "sending network state update to api clients"); @@ -562,6 +584,9 @@ impl NetworkManager { extra_data: D, callback: impl ReceiptCallback, ) -> EyreResult> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + bail!("network is not started"); + }; let receipt_manager = self.receipt_manager(); let routing_table = self.routing_table(); @@ -597,6 +622,10 @@ impl NetworkManager { expiration_us: u64, extra_data: D, ) -> EyreResult<(Vec, EventualValueFuture)> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + bail!("network is not started"); + }; + let receipt_manager = self.receipt_manager(); let routing_table = self.routing_table(); @@ -633,6 +662,10 @@ impl NetworkManager { &self, receipt_data: R, ) -> NetworkResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return NetworkResult::service_unavailable("network is not started"); + }; + let receipt_manager = self.receipt_manager(); let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { @@ -654,6 +687,10 @@ impl NetworkManager { receipt_data: R, inbound_noderef: NodeRef, ) -> NetworkResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return NetworkResult::service_unavailable("network is not started"); + }; + let receipt_manager = self.receipt_manager(); let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { @@ -674,6 +711,10 @@ impl NetworkManager { &self, receipt_data: R, ) -> NetworkResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return NetworkResult::service_unavailable("network is not started"); + }; + let receipt_manager = self.receipt_manager(); let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { @@ -695,6 +736,10 @@ impl NetworkManager { receipt_data: R, private_route: PublicKey, ) -> NetworkResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return NetworkResult::service_unavailable("network is not started"); + }; + let receipt_manager = self.receipt_manager(); let receipt = match Receipt::from_signed_data(self.crypto(), receipt_data.as_ref()) { @@ -710,12 +755,16 @@ impl NetworkManager { } // Process a received signal - #[instrument(level = "trace", target = "receipt", skip_all)] + #[instrument(level = "trace", target = "net", skip_all)] pub async fn handle_signal( &self, signal_flow: Flow, signal_info: SignalInfo, ) -> EyreResult> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return Ok(NetworkResult::service_unavailable("network is not started")); + }; + match signal_info { SignalInfo::ReverseConnect { receipt, peer_info } => { let routing_table = self.routing_table(); @@ -809,7 +858,7 @@ impl NetworkManager { } /// Builds an envelope for sending over the network - #[instrument(level = "trace", target = "receipt", skip_all)] + #[instrument(level = "trace", target = "net", skip_all)] fn build_envelope>( &self, dest_node_id: TypedKey, @@ -852,13 +901,19 @@ impl NetworkManager { /// node_ref is the direct destination to which the envelope will be sent /// If 'destination_node_ref' is specified, it can be different than the node_ref being sent to /// which will cause the envelope to be relayed - #[instrument(level = "trace", target = "receipt", skip_all)] + #[instrument(level = "trace", target = "net", skip_all)] pub async fn send_envelope>( &self, node_ref: NodeRef, destination_node_ref: Option, body: B, ) -> EyreResult> { + let _dg = DebugGuard::new(&FUCK); + + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + bail!("network is not started"); + }; + let destination_node_ref = destination_node_ref.as_ref().unwrap_or(&node_ref).clone(); let best_node_id = destination_node_ref.best_node_id(); @@ -896,6 +951,10 @@ impl NetworkManager { dial_info: DialInfo, rcpt_data: Vec, ) -> EyreResult<()> { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + bail!("network is not started"); + }; + // Do we need to validate the outgoing receipt? Probably not // because it is supposed to be opaque and the // recipient/originator does the validation @@ -916,8 +975,12 @@ impl NetworkManager { // Called when a packet potentially containing an RPC envelope is received by a low-level // network protocol handler. Processes the envelope, authenticates and decrypts the RPC message // and passes it to the RPC handler - #[instrument(level = "trace", target = "receipt", skip_all)] + #[instrument(level = "trace", target = "net", skip_all)] async fn on_recv_envelope(&self, data: &mut [u8], flow: Flow) -> EyreResult { + let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { + return Ok(false); + }; + log_net!("envelope of {} bytes received from {:?}", data.len(), flow); let remote_addr = flow.remote_address().ip_addr(); diff --git a/veilid-core/src/network_manager/native/mod.rs b/veilid-core/src/network_manager/native/mod.rs index a0ba7162..909809d0 100644 --- a/veilid-core/src/network_manager/native/mod.rs +++ b/veilid-core/src/network_manager/native/mod.rs @@ -941,10 +941,12 @@ impl Network { } Ok(StartupDisposition::BindRetry) => { debug!("network bind retry"); + self.shutdown_internal().await; Ok(StartupDisposition::BindRetry) } Err(e) => { debug!("network failed to start"); + self.shutdown_internal().await; Err(e) } } @@ -964,13 +966,7 @@ impl Network { } #[instrument(level = "debug", skip_all)] - pub async fn shutdown(&self) { - log_net!(debug "starting low level network shutdown"); - let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { - log_net!(debug "low level network is already shut down"); - return; - }; - + async fn shutdown_internal(&self) { let routing_table = self.routing_table(); // Stop all tasks @@ -1014,6 +1010,17 @@ impl Network { // Reset state including network class *self.inner.lock() = Self::new_inner(); + } + + #[instrument(level = "debug", skip_all)] + pub async fn shutdown(&self) { + log_net!(debug "starting low level network shutdown"); + let Ok(guard) = self.unlocked_inner.startup_lock.shutdown().await else { + log_net!(debug "low level network is already shut down"); + return; + }; + + self.shutdown_internal().await; guard.success(); log_net!(debug "finished low level network shutdown"); diff --git a/veilid-core/src/network_manager/network_connection.rs b/veilid-core/src/network_manager/network_connection.rs index 262a5a3f..4f45a8a9 100644 --- a/veilid-core/src/network_manager/network_connection.rs +++ b/veilid-core/src/network_manager/network_connection.rs @@ -309,10 +309,6 @@ impl NetworkConnection { match res { Ok((_span_id, message)) => { - // let span = span!(Level::TRACE, "process_connection send"); - // span.follows_from(span_id); - // let _enter = span.enter(); - // Touch the LRU for this connection connection_manager.touch_connection_by_id(connection_id); diff --git a/veilid-core/src/network_manager/stats.rs b/veilid-core/src/network_manager/stats.rs index 2c5f0b97..589add0f 100644 --- a/veilid-core/src/network_manager/stats.rs +++ b/veilid-core/src/network_manager/stats.rs @@ -35,7 +35,7 @@ impl Default for NetworkManagerStats { impl NetworkManager { // Callbacks from low level network for statistics gathering - pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) { + pub(crate) fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) { let inner = &mut *self.inner.lock(); inner .stats @@ -52,7 +52,7 @@ impl NetworkManager { .add_up(bytes); } - pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) { + pub(crate) fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) { let inner = &mut *self.inner.lock(); inner .stats diff --git a/veilid-core/src/routing_table/tasks/ping_validator.rs b/veilid-core/src/routing_table/tasks/ping_validator.rs index 3cd3d269..c95bd888 100644 --- a/veilid-core/src/routing_table/tasks/ping_validator.rs +++ b/veilid-core/src/routing_table/tasks/ping_validator.rs @@ -114,7 +114,6 @@ impl RoutingTable { rpc.rpc_call_status(Destination::direct(relay_nr_filtered)) .await } - .instrument(Span::current()) .boxed(), ); } @@ -159,9 +158,7 @@ impl RoutingTable { log_rtab!("--> Watch ping to {:?}", watch_nr); futurequeue.push_back( - async move { rpc.rpc_call_status(Destination::direct(watch_nr)).await } - .instrument(Span::current()) - .boxed(), + async move { rpc.rpc_call_status(Destination::direct(watch_nr)).await }.boxed(), ); } Ok(()) @@ -198,9 +195,7 @@ impl RoutingTable { let rpc = rpc.clone(); log_rtab!("--> Validator ping to {:?}", nr); futurequeue.push_back( - async move { rpc.rpc_call_status(Destination::direct(nr)).await } - .instrument(Span::current()) - .boxed(), + async move { rpc.rpc_call_status(Destination::direct(nr)).await }.boxed(), ); } @@ -226,9 +221,7 @@ impl RoutingTable { // Just do a single ping with the best protocol for all the nodes futurequeue.push_back( - async move { rpc.rpc_call_status(Destination::direct(nr)).await } - .instrument(Span::current()) - .boxed(), + async move { rpc.rpc_call_status(Destination::direct(nr)).await }.boxed(), ); } @@ -244,6 +237,8 @@ impl RoutingTable { _last_ts: Timestamp, cur_ts: Timestamp, ) -> EyreResult<()> { + eprintln!("pv tick"); + let mut futurequeue: VecDeque = VecDeque::new(); // PublicInternet @@ -258,7 +253,6 @@ impl RoutingTable { let mut unord = FuturesUnordered::new(); while !unord.is_empty() || !futurequeue.is_empty() { - #[cfg(feature = "verbose-tracing")] log_rtab!(debug "Ping validation queue: {} remaining, {} in progress", futurequeue.len(), unord.len()); // Process one unordered futures if we have some match unord.next().timeout_at(stop_token.clone()).await { @@ -279,7 +273,7 @@ impl RoutingTable { let Some(fq) = futurequeue.pop_front() else { break; }; - unord.push(fq); + unord.push(fq.in_current_span()); } } diff --git a/veilid-server/Cargo.toml b/veilid-server/Cargo.toml index 28e98494..9feee64f 100644 --- a/veilid-server/Cargo.toml +++ b/veilid-server/Cargo.toml @@ -81,6 +81,7 @@ stop-token = { version = "^0", default-features = false } sysinfo = { version = "^0.30.13" } wg = { version = "^0.9.1", features = ["future"] } tracing-flame = "0.2.0" +tracing-perfetto = "0.1.1" [target.'cfg(windows)'.dependencies] windows-service = "^0" diff --git a/veilid-server/src/main.rs b/veilid-server/src/main.rs index 11e7f460..67ae422f 100644 --- a/veilid-server/src/main.rs +++ b/veilid-server/src/main.rs @@ -84,10 +84,14 @@ pub struct CmdlineArgs { #[arg(long, value_name = "endpoint")] otlp: Option, - /// Turn on flamegraph tracing (experimental, isn't terribly useful) + /// Turn on flamegraph tracing (experimental) #[arg(long, hide = true, value_name = "PATH", num_args=0..=1, require_equals=true, default_missing_value = "")] flame: Option, + /// Turn on perfetto tracing (experimental) + #[arg(long, hide = true, value_name = "PATH", num_args=0..=1, require_equals=true, default_missing_value = "")] + perfetto: Option, + /// Run as an extra daemon on the same machine for testing purposes, specify a number greater than zero to offset the listening ports #[arg(long)] subnode_index: Option, @@ -223,6 +227,18 @@ fn main() -> EyreResult<()> { settingsrw.logging.flame.enabled = true; settingsrw.logging.flame.path = flame; } + if let Some(perfetto) = args.perfetto { + let perfetto = if perfetto.is_empty() { + Settings::get_default_perfetto_path(settingsrw.testing.subnode_index) + .to_string_lossy() + .to_string() + } else { + perfetto.to_string_lossy().to_string() + }; + println!("Enabling perfetto output to {}", perfetto); + settingsrw.logging.perfetto.enabled = true; + settingsrw.logging.perfetto.path = perfetto; + } if args.no_attach { settingsrw.auto_attach = false; diff --git a/veilid-server/src/settings.rs b/veilid-server/src/settings.rs index e09f1305..d65f271a 100644 --- a/veilid-server/src/settings.rs +++ b/veilid-server/src/settings.rs @@ -69,6 +69,9 @@ logging: flame: enabled: false path: '' + perfetto: + enabled: false + path: '' console: enabled: false testing: @@ -451,6 +454,12 @@ pub struct Flame { pub path: String, } +#[derive(Debug, Deserialize, Serialize)] +pub struct Perfetto { + pub enabled: bool, + pub path: String, +} + #[derive(Debug, Deserialize, Serialize)] pub struct Console { pub enabled: bool, @@ -503,6 +512,7 @@ pub struct Logging { pub api: Api, pub otlp: Otlp, pub flame: Flame, + pub perfetto: Perfetto, pub console: Console, } @@ -873,6 +883,15 @@ impl Settings { }) } + /// Determine default perfetto output path + pub fn get_default_perfetto_path(subnode_index: u16) -> PathBuf { + std::env::temp_dir().join(if subnode_index == 0 { + "veilid-server.pftrace".to_owned() + } else { + format!("veilid-server-{}.pftrace", subnode_index) + }) + } + #[allow(dead_code)] fn get_or_create_private_directory>(path: P, group_read: bool) -> bool { let path = path.as_ref(); @@ -996,6 +1015,8 @@ impl Settings { set_config_value!(inner.logging.otlp.ignore_log_targets, value); set_config_value!(inner.logging.flame.enabled, value); set_config_value!(inner.logging.flame.path, value); + set_config_value!(inner.logging.perfetto.enabled, value); + set_config_value!(inner.logging.perfetto.path, value); set_config_value!(inner.logging.console.enabled, value); set_config_value!(inner.testing.subnode_index, value); set_config_value!(inner.core.capabilities.disable, value); @@ -1565,6 +1586,8 @@ mod tests { ); assert!(!s.logging.flame.enabled); assert_eq!(s.logging.flame.path, ""); + assert!(!s.logging.perfetto.enabled); + assert_eq!(s.logging.perfetto.path, ""); assert!(!s.logging.console.enabled); assert_eq!(s.testing.subnode_index, 0); diff --git a/veilid-server/src/veilid_logs.rs b/veilid-server/src/veilid_logs.rs index 62674f12..1e0ac8b9 100644 --- a/veilid-server/src/veilid_logs.rs +++ b/veilid-server/src/veilid_logs.rs @@ -18,6 +18,7 @@ use std::path::*; use std::sync::Arc; use tracing_appender::*; use tracing_flame::FlameLayer; +use tracing_perfetto::PerfettoLayer; use tracing_subscriber::prelude::*; use tracing_subscriber::*; @@ -48,14 +49,15 @@ impl VeilidLogs { #[cfg(feature = "rt-tokio")] if settingsr.logging.console.enabled { + let filter = veilid_core::VeilidLayerFilter::new_no_default( + veilid_core::VeilidConfigLogLevel::Trace, + &[], + ); + let layer = ConsoleLayer::builder() .with_default_env() .spawn() - .with_filter( - filter::Targets::new() - .with_target("tokio", Level::TRACE) - .with_target("runtime", Level::TRACE), - ); + .with_filter(filter); layers.push(layer.boxed()); } @@ -93,6 +95,26 @@ impl VeilidLogs { ); } + // Perfetto logger + if settingsr.logging.perfetto.enabled { + let filter = veilid_core::VeilidLayerFilter::new_no_default( + veilid_core::VeilidConfigLogLevel::Trace, + &veilid_core::FLAME_LOG_FACILITIES_IGNORE_LIST.map(|x| x.to_string()), + ); + let perfetto_layer = PerfettoLayer::new(std::sync::Mutex::new(std::fs::File::create( + &settingsr.logging.perfetto.path, + )?)); + + // Do not include this in change_log_level changes, so we keep trace level + // filters.insert("flame", filter.clone()); + layers.push( + perfetto_layer + .with_debug_annotations(true) + .with_filter(filter) + .boxed(), + ); + } + // OpenTelemetry logger #[cfg(feature = "opentelemetry-otlp")] if settingsr.logging.otlp.enabled { diff --git a/veilid-tools/Cargo.toml b/veilid-tools/Cargo.toml index 3cce5fa6..f6c2d046 100644 --- a/veilid-tools/Cargo.toml +++ b/veilid-tools/Cargo.toml @@ -34,6 +34,7 @@ rt-wasm-bindgen = ["async_executors/bindgen", "async_executors/timer"] veilid_tools_android_tests = ["dep:paranoid-android"] veilid_tools_ios_tests = ["dep:tracing", "dep:oslog", "dep:tracing-oslog"] tracing = ["dep:tracing", "dep:tracing-subscriber"] +debug-locks = [] [dependencies] tracing = { version = "0.1.40", features = [ @@ -52,6 +53,7 @@ futures-util = { version = "0.3.30", default-features = false, features = [ "alloc", ] } parking_lot = "0.12.3" +async-lock = "3.4.0" once_cell = "1.19.0" stop-token = { version = "0.7.0", default-features = false } rand = "0.8.5" @@ -87,7 +89,6 @@ wasm-bindgen-futures = "0.4.42" async_executors = { version = "0.7.0", default-features = false } getrandom = { version = "0.2", features = ["js"] } -async-lock = "2.8.0" send_wrapper = { version = "0.6.0", features = ["futures"] } # Dependencies for Linux or Android diff --git a/veilid-tools/src/lib.rs b/veilid-tools/src/lib.rs index 05aaf566..78fd2c4c 100644 --- a/veilid-tools/src/lib.rs +++ b/veilid-tools/src/lib.rs @@ -107,7 +107,7 @@ pub use std::str::FromStr; #[doc(no_inline)] pub use std::string::{String, ToString}; #[doc(no_inline)] -pub use std::sync::atomic::{AtomicBool, Ordering}; +pub use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; #[doc(no_inline)] pub use std::sync::{Arc, Weak}; #[doc(no_inline)] @@ -117,6 +117,13 @@ pub use std::time::Duration; #[doc(no_inline)] pub use std::vec::Vec; +#[doc(no_inline)] +pub use async_lock::RwLock as AsyncRwLock; +#[doc(no_inline)] +pub use async_lock::RwLockReadGuard as AsyncRwLockReadGuard; +#[doc(no_inline)] +pub use async_lock::RwLockWriteGuard as AsyncRwLockWriteGuard; + cfg_if! { if #[cfg(target_arch = "wasm32")] { #[doc(no_inline)] @@ -126,13 +133,6 @@ cfg_if! { #[doc(no_inline)] pub use async_lock::MutexGuardArc as AsyncMutexGuardArc; - #[doc(no_inline)] - pub use async_lock::RwLock as AsyncRwLock; - #[doc(no_inline)] - pub use async_lock::RwLockReadGuard as AsyncRwLockReadGuard; - #[doc(no_inline)] - pub use async_lock::RwLockWriteGuard as AsyncRwLockWriteGuard; - #[doc(no_inline)] pub use async_executors::JoinHandle as LowLevelJoinHandle; @@ -146,12 +146,12 @@ cfg_if! { #[doc(no_inline)] pub use async_std::sync::MutexGuardArc as AsyncMutexGuardArc; - #[doc(no_inline)] - pub use async_std::sync::RwLock as AsyncRwLock; - #[doc(no_inline)] - pub use async_std::sync::RwLockReadGuard as AsyncRwLockReadGuard; - #[doc(no_inline)] - pub use async_std::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; + // #[doc(no_inline)] + // pub use async_std::sync::RwLock as AsyncRwLock; + // #[doc(no_inline)] + // pub use async_std::sync::RwLockReadGuard as AsyncRwLockReadGuard; + // #[doc(no_inline)] + // pub use async_std::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; #[doc(no_inline)] pub use async_std::task::JoinHandle as LowLevelJoinHandle; @@ -164,12 +164,12 @@ cfg_if! { #[doc(no_inline)] pub use tokio::sync::OwnedMutexGuard as AsyncMutexGuardArc; - #[doc(no_inline)] - pub use tokio::sync::RwLock as AsyncRwLock; - #[doc(no_inline)] - pub use tokio::sync::RwLockReadGuard as AsyncRwLockReadGuard; - #[doc(no_inline)] - pub use tokio::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; + // #[doc(no_inline)] + // pub use tokio::sync::RwLock as AsyncRwLock; + // #[doc(no_inline)] + // pub use tokio::sync::RwLockReadGuard as AsyncRwLockReadGuard; + // #[doc(no_inline)] + // pub use tokio::sync::RwLockWriteGuard as AsyncRwLockWriteGuard; #[doc(no_inline)] diff --git a/veilid-tools/src/startup_lock.rs b/veilid-tools/src/startup_lock.rs index 26e58948..04095375 100644 --- a/veilid-tools/src/startup_lock.rs +++ b/veilid-tools/src/startup_lock.rs @@ -34,8 +34,22 @@ impl<'a> StartupLockGuard<'a> { #[derive(Debug)] pub struct StartupLockEnterGuard<'a> { _guard: AsyncRwLockReadGuard<'a, bool>, + // #[cfg(feature = "debug-locks")] + id: usize, + // #[cfg(feature = "debug-locks")] + active_guards: Arc>>, } +//#[cfg(feature = "debug-locks")] +impl<'a> Drop for StartupLockEnterGuard<'a> { + fn drop(&mut self) { + self.active_guards.lock().remove(&self.id); + } +} + +//#[cfg(feature = "debug-locks")] +static GUARD_ID: AtomicUsize = AtomicUsize::new(0); + /// Synchronization mechanism that tracks the startup and shutdown of a region of code. /// Guarantees that some code can only be started up once and shut down only if it is /// already started. @@ -46,12 +60,16 @@ pub struct StartupLockEnterGuard<'a> { #[derive(Debug)] pub struct StartupLock { rwlock: AsyncRwLock, + // #[cfg(feature = "debug-locks")] + active_guards: Arc>>, } impl StartupLock { pub fn new() -> Self { Self { rwlock: AsyncRwLock::new(false), + // #[cfg(feature = "debug-locks")] + active_guards: Arc::new(Mutex::new(HashMap::new())), } } @@ -91,7 +109,20 @@ impl StartupLock { /// One must call 'success()' on the returned startup lock guard if shutdown was successful /// otherwise the startup lock will not shift to the 'stopped' state. pub async fn shutdown(&self) -> Result { - let guard = self.rwlock.write().await; + cfg_if! { + if #[cfg(feature = "debug-locks")] { + //let guard = self.rwlock.write().await; + } else { + let guard = self.rwlock.write().await; + // let guard = match timeout(30000, self.rwlock.write()).await { + // Ok(v) => v, + // Err(_) => { + // eprintln!("active guards: {:#?}", self.active_guards.lock().values().collect::>()); + // panic!("shutdown deadlock"); + // } + // }; + } + } if !*guard { return Err(StartupLockAlreadyShutDownError); } @@ -109,7 +140,16 @@ impl StartupLock { if !*guard { return Err(StartupLockNotStartedError); } - Ok(StartupLockEnterGuard { _guard: guard }) + let out = StartupLockEnterGuard { + _guard: guard, + //#[cfg(feature = "debug-locks")] + id: GUARD_ID.fetch_add(1, Ordering::AcqRel), + active_guards: self.active_guards.clone(), + }; + self.active_guards + .lock() + .insert(out.id, backtrace::Backtrace::new()); + Ok(out) } } diff --git a/veilid-tools/src/tests/common/test_startup_lock.rs b/veilid-tools/src/tests/common/test_startup_lock.rs index 656f3e30..d8823839 100644 --- a/veilid-tools/src/tests/common/test_startup_lock.rs +++ b/veilid-tools/src/tests/common/test_startup_lock.rs @@ -114,8 +114,66 @@ pub async fn test_bad_enter() { lock.enter().expect_err("should not enter when shut down"); } + +pub async fn test_multiple_enter() { + info!("test_multiple_enter"); + + let lock = Arc::new(StartupLock::new()); + + let s1 = lock.startup().expect("should startup"); + s1.success(); + + { + let _e1 = lock.enter().expect("should enter 1"); + { + let _e2 = lock.enter().expect("should enter 2"); + { + let _e3 = lock.enter().expect("should enter 3"); + } + } + } + + let e4 = lock.enter().expect("should enter 4"); + let e5 = lock.enter().expect("should enter 5"); + let e6 = lock.enter().expect("should enter 6"); + + //eprintln!("1"); + let lock2 = lock.clone(); + let jh = spawn(async move { + //eprintln!("2"); + let guard = lock2.shutdown().await.expect("should shutdown"); + //eprintln!("7"); + sleep(2000).await; + //eprintln!("8"); + guard.success(); + }); + sleep(1000).await; + //eprintln!("3"); + assert!(!lock.is_started()); + assert!(!lock.is_shut_down()); + + // Now drop the enter created before shutdown + drop(e4); + //eprintln!("4"); + drop(e5); + //eprintln!("5"); + drop(e6); + //eprintln!("6"); + + // This should finally exit + jh.await; + + //eprintln!("9"); + + assert!(!lock.is_started()); + assert!(lock.is_shut_down()); + + lock.enter().expect_err("should not enter"); +} + pub async fn test_all() { test_startup_shutdown().await; test_contention().await; test_bad_enter().await; + test_multiple_enter().await; } diff --git a/veilid-tools/src/tools.rs b/veilid-tools/src/tools.rs index 4d378891..4536b5dd 100644 --- a/veilid-tools/src/tools.rs +++ b/veilid-tools/src/tools.rs @@ -55,19 +55,19 @@ cfg_if::cfg_if! { }; } - #[macro_export] - macro_rules! asyncrwlock_try_read { - ($x:expr) => { - $x.try_read().ok() - }; - } + // #[macro_export] + // macro_rules! asyncrwlock_try_read { + // ($x:expr) => { + // $x.try_read().ok() + // }; + // } - #[macro_export] - macro_rules! asyncrwlock_try_write { - ($x:expr) => { - $x.try_write().ok() - }; - } + // #[macro_export] + // macro_rules! asyncrwlock_try_write { + // ($x:expr) => { + // $x.try_write().ok() + // }; + // } } else { #[macro_export] macro_rules! asyncmutex_try_lock { @@ -87,21 +87,23 @@ cfg_if::cfg_if! { $x.try_lock_arc() }; } - #[macro_export] - macro_rules! asyncrwlock_try_read { - ($x:expr) => { - $x.try_read() - }; - } - #[macro_export] - macro_rules! asyncrwlock_try_write { - ($x:expr) => { - $x.try_write() - }; - } + } } +#[macro_export] +macro_rules! asyncrwlock_try_read { + ($x:expr) => { + $x.try_read() + }; +} +#[macro_export] +macro_rules! asyncrwlock_try_write { + ($x:expr) => { + $x.try_write() + }; +} + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub fn system_boxed<'a, Out>( @@ -500,3 +502,24 @@ pub fn type_name_of_val(_val: &T) -> &'static str { pub fn map_to_string(arg: X) -> String { arg.to_string() } + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct DebugGuard { + counter: &'static AtomicUsize, +} + +impl DebugGuard { + pub fn new(counter: &'static AtomicUsize) -> Self { + let c = counter.fetch_add(1, Ordering::SeqCst); + eprintln!("DebugGuard Entered: {}", c + 1); + Self { counter } + } +} + +impl Drop for DebugGuard { + fn drop(&mut self) { + let c = self.counter.fetch_sub(1, Ordering::SeqCst); + eprintln!("DebugGuard Exited: {}", c - 1); + } +} From 39d8b397fae843f9c4c058ca0276ae3bac736c34 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sat, 20 Jul 2024 18:39:24 -0400 Subject: [PATCH 05/13] re-enable SO_LINGER(0) --- .../src/network_manager/native/network_tcp.rs | 54 +++++++++---------- .../native/protocol/sockets.rs | 6 +-- .../src/network_manager/network_connection.rs | 4 +- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/veilid-core/src/network_manager/native/network_tcp.rs b/veilid-core/src/network_manager/native/network_tcp.rs index aeff8359..d47a4da3 100644 --- a/veilid-core/src/network_manager/native/network_tcp.rs +++ b/veilid-core/src/network_manager/native/network_tcp.rs @@ -132,33 +132,33 @@ impl Network { } }; - // #[cfg(all(feature = "rt-async-std", unix))] - // { - // // async-std does not directly support linger on TcpStream yet - // use std::os::fd::{AsRawFd, FromRawFd}; - // if let Err(e) = unsafe { socket2::Socket::from_raw_fd(tcp_stream.as_raw_fd()) } - // .set_linger(Some(core::time::Duration::from_secs(0))) - // { - // log_net!(debug "Couldn't set TCP linger: {}", e); - // return; - // } - // } - // #[cfg(all(feature = "rt-async-std", windows))] - // { - // // async-std does not directly support linger on TcpStream yet - // use std::os::windows::io::{AsRawSocket, FromRawSocket}; - // if let Err(e) = unsafe { socket2::Socket::from_raw_socket(tcp_stream.as_raw_socket()) } - // .set_linger(Some(core::time::Duration::from_secs(0))) - // { - // log_net!(debug "Couldn't set TCP linger: {}", e); - // return; - // } - // } - // #[cfg(not(feature = "rt-async-std"))] - // if let Err(e) = tcp_stream.set_linger(Some(core::time::Duration::from_secs(0))) { - // log_net!(debug "Couldn't set TCP linger: {}", e); - // return; - // } + #[cfg(all(feature = "rt-async-std", unix))] + { + // async-std does not directly support linger on TcpStream yet + use std::os::fd::{AsRawFd, FromRawFd}; + if let Err(e) = unsafe { socket2::Socket::from_raw_fd(tcp_stream.as_raw_fd()) } + .set_linger(Some(core::time::Duration::from_secs(0))) + { + log_net!(debug "Couldn't set TCP linger: {}", e); + return; + } + } + #[cfg(all(feature = "rt-async-std", windows))] + { + // async-std does not directly support linger on TcpStream yet + use std::os::windows::io::{AsRawSocket, FromRawSocket}; + if let Err(e) = unsafe { socket2::Socket::from_raw_socket(tcp_stream.as_raw_socket()) } + .set_linger(Some(core::time::Duration::from_secs(0))) + { + log_net!(debug "Couldn't set TCP linger: {}", e); + return; + } + } + #[cfg(not(feature = "rt-async-std"))] + if let Err(e) = tcp_stream.set_linger(Some(core::time::Duration::from_secs(0))) { + log_net!(debug "Couldn't set TCP linger: {}", e); + return; + } if let Err(e) = tcp_stream.set_nodelay(true) { log_net!(debug "Couldn't set TCP nodelay: {}", e); return; diff --git a/veilid-core/src/network_manager/native/protocol/sockets.rs b/veilid-core/src/network_manager/native/protocol/sockets.rs index c8a18f9e..da100d6f 100644 --- a/veilid-core/src/network_manager/native/protocol/sockets.rs +++ b/veilid-core/src/network_manager/native/protocol/sockets.rs @@ -91,9 +91,9 @@ pub fn new_default_tcp_socket(domain: Domain) -> io::Result { #[instrument(level = "trace", ret)] pub fn new_shared_tcp_socket(domain: Domain) -> io::Result { let socket = Socket::new(domain, Type::STREAM, Some(Protocol::TCP))?; - // if let Err(e) = socket.set_linger(Some(core::time::Duration::from_secs(0))) { - // log_net!(error "Couldn't set TCP linger: {}", e); - // } + if let Err(e) = socket.set_linger(Some(core::time::Duration::from_secs(0))) { + log_net!(error "Couldn't set TCP linger: {}", e); + } if let Err(e) = socket.set_nodelay(true) { log_net!(error "Couldn't set TCP nodelay: {}", e); } diff --git a/veilid-core/src/network_manager/network_connection.rs b/veilid-core/src/network_manager/network_connection.rs index 4f45a8a9..6acc0117 100644 --- a/veilid-core/src/network_manager/network_connection.rs +++ b/veilid-core/src/network_manager/network_connection.rs @@ -265,7 +265,7 @@ impl NetworkConnection { // Connection receiver loop #[allow(clippy::too_many_arguments)] - //#[instrument(level="trace", target="net", skip_all)] + #[instrument(level="trace", target="net", skip_all)] fn process_connection( connection_manager: ConnectionManager, local_stop_token: StopToken, @@ -299,7 +299,7 @@ impl NetworkConnection { }; let timer = MutableFuture::new(new_timer()); - unord.push(system_boxed(timer.clone().in_current_span())); + unord.push(system_boxed(timer.clone())); loop { // Add another message sender future if necessary From 35dc7bdfd6a69ecec2c32b114d06bb04ea4f83f4 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sat, 20 Jul 2024 19:42:25 -0400 Subject: [PATCH 06/13] spawn names --- veilid-cli/src/client_api_connection.rs | 2 +- veilid-cli/src/command_processor.rs | 20 +++++----- veilid-cli/src/log_viewer_ui.rs | 1 + veilid-core/src/attachment_manager.rs | 5 ++- veilid-core/src/crypto/mod.rs | 2 +- .../src/network_manager/connection_manager.rs | 5 ++- veilid-core/src/network_manager/mod.rs | 16 ++++++-- .../src/network_manager/native/igd_manager.rs | 6 +-- veilid-core/src/network_manager/native/mod.rs | 6 +-- .../src/network_manager/native/network_tcp.rs | 2 +- .../src/network_manager/native/network_udp.rs | 4 +- .../src/network_manager/network_connection.rs | 2 +- .../src/network_manager/receipt_manager.rs | 1 + veilid-core/src/network_manager/wasm/mod.rs | 2 - veilid-core/src/routing_table/mod.rs | 28 ++++++++++---- veilid-core/src/rpc_processor/mod.rs | 11 ++---- veilid-core/src/storage_manager/get_value.rs | 2 +- veilid-core/src/storage_manager/mod.rs | 25 +++++++++--- veilid-core/src/storage_manager/set_value.rs | 2 +- .../storage_manager/storage_manager_inner.rs | 2 +- veilid-core/src/veilid_api/api.rs | 2 +- veilid-server/src/client_api.rs | 14 +++++-- veilid-server/src/server.rs | 2 +- veilid-server/src/unix.rs | 2 +- veilid-tools/src/deferred_stream_processor.rs | 5 ++- veilid-tools/src/interval.rs | 8 ++-- veilid-tools/src/must_join_single_future.rs | 6 ++- veilid-tools/src/spawn.rs | 38 +++++++++---------- .../src/tests/common/test_async_tag_lock.rs | 12 +++--- .../src/tests/common/test_host_interface.rs | 37 +++++++++--------- .../src/tests/common/test_startup_lock.rs | 6 +-- veilid-tools/src/tick_task.rs | 16 ++++++-- 32 files changed, 177 insertions(+), 115 deletions(-) diff --git a/veilid-cli/src/client_api_connection.rs b/veilid-cli/src/client_api_connection.rs index ef185a2a..18a3f277 100644 --- a/veilid-cli/src/client_api_connection.rs +++ b/veilid-cli/src/client_api_connection.rs @@ -187,7 +187,7 @@ impl ClientApiConnection { // Request initial server state let capi = self.clone(); - spawn_detached_local(async move { + spawn_detached_local("get initial server state", async move { let mut req = json::JsonValue::new_object(); req["op"] = "GetState".into(); let Some(resp) = capi.perform_request(req).await else { diff --git a/veilid-cli/src/command_processor.rs b/veilid-cli/src/command_processor.rs index 7eb7c847..f648eff4 100644 --- a/veilid-cli/src/command_processor.rs +++ b/veilid-cli/src/command_processor.rs @@ -114,7 +114,7 @@ impl CommandProcessor { trace!("CommandProcessor::cmd_help"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd help", async move { let out = match capi.server_debug("help".to_owned()).await { Err(e) => { error!("Server command 'debug help' failed: {}", e); @@ -166,7 +166,7 @@ Server Debug Commands: trace!("CommandProcessor::cmd_shutdown"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd shutdown", async move { if let Err(e) = capi.server_shutdown().await { error!("Server command 'shutdown' failed to execute: {}", e); } @@ -179,7 +179,7 @@ Server Debug Commands: trace!("CommandProcessor::cmd_disconnect"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd disconnect", async move { capi.disconnect().await; ui.send_callback(callback); }); @@ -190,7 +190,7 @@ Server Debug Commands: trace!("CommandProcessor::cmd_debug"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd debug", async move { match capi.server_debug(command_line).await { Ok(output) => { ui.add_node_event(Level::Info, &output); @@ -213,7 +213,7 @@ Server Debug Commands: trace!("CommandProcessor::cmd_change_log_level"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd change_log_level", async move { let (layer, rest) = Self::word_split(&rest.unwrap_or_default()); let log_level = match convert_loglevel(&rest.unwrap_or_default()) { Ok(v) => v, @@ -252,7 +252,7 @@ Server Debug Commands: trace!("CommandProcessor::cmd_change_log_ignore"); let capi = self.capi(); let ui = self.ui_sender(); - spawn_detached_local(async move { + spawn_detached_local("cmd change_log_ignoe", async move { let (layer, rest) = Self::word_split(&rest.unwrap_or_default()); let log_ignore = rest.unwrap_or_default(); @@ -284,7 +284,7 @@ Server Debug Commands: let ui = self.ui_sender(); let this = self.clone(); - spawn_detached_local(async move { + spawn_detached_local("cmd enable", async move { let flag = rest.clone().unwrap_or_default(); match flag.as_str() { "app_messages" => { @@ -306,7 +306,7 @@ Server Debug Commands: let ui = self.ui_sender(); let this = self.clone(); - spawn_detached_local(async move { + spawn_detached_local("cmd disable", async move { let flag = rest.clone().unwrap_or_default(); match flag.as_str() { "app_messages" => { @@ -664,7 +664,7 @@ Server Debug Commands: pub fn attach(&self) { let capi = self.capi(); - spawn_detached_local(async move { + spawn_detached_local("attach", async move { if let Err(e) = capi.server_attach().await { error!("Server command 'attach' failed to execute: {}", e); } @@ -674,7 +674,7 @@ Server Debug Commands: pub fn detach(&self) { let capi = self.capi(); - spawn_detached_local(async move { + spawn_detached_local("detach", async move { if let Err(e) = capi.server_detach().await { error!("Server command 'detach' failed to execute: {}", e); } diff --git a/veilid-cli/src/log_viewer_ui.rs b/veilid-cli/src/log_viewer_ui.rs index 696cefe4..1e740ad0 100644 --- a/veilid-cli/src/log_viewer_ui.rs +++ b/veilid-cli/src/log_viewer_ui.rs @@ -86,6 +86,7 @@ impl LogViewerUI { done.await; } else { while let Ok(Ok(c)) = blocking_wrapper( + "LogViewerUI read", { let term = term.clone(); move || term.read_char() diff --git a/veilid-core/src/attachment_manager.rs b/veilid-core/src/attachment_manager.rs index 6a1a3a55..eaea459a 100644 --- a/veilid-core/src/attachment_manager.rs +++ b/veilid-core/src/attachment_manager.rs @@ -323,7 +323,10 @@ impl AttachmentManager { return false; } inner.maintain_peers = true; - inner.attachment_maintainer_jh = Some(spawn(self.clone().attachment_maintainer())); + inner.attachment_maintainer_jh = Some(spawn( + "attachment maintainer", + self.clone().attachment_maintainer(), + )); true } diff --git a/veilid-core/src/crypto/mod.rs b/veilid-core/src/crypto/mod.rs index 9d88a3ca..81218942 100644 --- a/veilid-core/src/crypto/mod.rs +++ b/veilid-core/src/crypto/mod.rs @@ -176,7 +176,7 @@ impl Crypto { // Schedule flushing let this = self.clone(); - let flush_future = interval(60000, move || { + let flush_future = interval("crypto flush", 60000, move || { let this = this.clone(); async move { if let Err(e) = this.flush().await { diff --git a/veilid-core/src/network_manager/connection_manager.rs b/veilid-core/src/network_manager/connection_manager.rs index 2539a7a8..6d789f10 100644 --- a/veilid-core/src/network_manager/connection_manager.rs +++ b/veilid-core/src/network_manager/connection_manager.rs @@ -134,7 +134,10 @@ impl ConnectionManager { let stop_source = StopSource::new(); // Spawn the async processor - let async_processor = spawn(self.clone().async_processor(stop_source.token(), receiver)); + let async_processor = spawn( + "connection manager async processor", + self.clone().async_processor(stop_source.token(), receiver), + ); // Store in the inner object *inner = Some(Self::new_inner(stop_source, sender, async_processor)); diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index 524ebccd..ef69b170 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -141,6 +141,7 @@ enum SendDataToExistingFlowResult { #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum StartupDisposition { Success, + #[cfg_attr(target_arch = "wasm32", allow(dead_code))] BindRetry, } @@ -213,9 +214,18 @@ impl NetworkManager { routing_table: RwLock::new(None), components: RwLock::new(None), update_callback: RwLock::new(None), - rolling_transfers_task: TickTask::new(ROLLING_TRANSFERS_INTERVAL_SECS), - public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS), - address_filter_task: TickTask::new(ADDRESS_FILTER_TASK_INTERVAL_SECS), + rolling_transfers_task: TickTask::new( + "rolling_transfers_task", + ROLLING_TRANSFERS_INTERVAL_SECS, + ), + public_address_check_task: TickTask::new( + "public_address_check_task", + PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS, + ), + address_filter_task: TickTask::new( + "address_filter_task", + ADDRESS_FILTER_TASK_INTERVAL_SECS, + ), network_key, startup_lock: StartupLock::new(), } diff --git a/veilid-core/src/network_manager/native/igd_manager.rs b/veilid-core/src/network_manager/native/igd_manager.rs index bdd0391a..c867323a 100644 --- a/veilid-core/src/network_manager/native/igd_manager.rs +++ b/veilid-core/src/network_manager/native/igd_manager.rs @@ -191,7 +191,7 @@ impl IGDManager { mapped_port: u16, ) -> Option<()> { let this = self.clone(); - blocking_wrapper(move || { + blocking_wrapper("igd unmap_port", move || { let mut inner = this.inner.lock(); // If we already have this port mapped, just return the existing portmap @@ -235,7 +235,7 @@ impl IGDManager { expected_external_address: Option, ) -> Option { let this = self.clone(); - blocking_wrapper(move || { + blocking_wrapper("igd map_any_port", move || { let mut inner = this.inner.lock(); // If we already have this port mapped, just return the existing portmap @@ -340,7 +340,7 @@ impl IGDManager { } let this = self.clone(); - blocking_wrapper(move || { + blocking_wrapper("igd tick", move || { let mut inner = this.inner.lock(); // Process full renewals diff --git a/veilid-core/src/network_manager/native/mod.rs b/veilid-core/src/network_manager/native/mod.rs index 909809d0..1e9a0400 100644 --- a/veilid-core/src/network_manager/native/mod.rs +++ b/veilid-core/src/network_manager/native/mod.rs @@ -173,9 +173,9 @@ impl Network { routing_table, connection_manager, interfaces: NetworkInterfaces::new(), - update_network_class_task: TickTask::new(1), - network_interfaces_task: TickTask::new(1), - upnp_task: TickTask::new(1), + update_network_class_task: TickTask::new("update_network_class_task", 1), + network_interfaces_task: TickTask::new("network_interfaces_task", 1), + upnp_task: TickTask::new("upnp_task", 1), igd_manager: igd_manager::IGDManager::new(config.clone()), } } diff --git a/veilid-core/src/network_manager/native/network_tcp.rs b/veilid-core/src/network_manager/native/network_tcp.rs index d47a4da3..60d26a35 100644 --- a/veilid-core/src/network_manager/native/network_tcp.rs +++ b/veilid-core/src/network_manager/native/network_tcp.rs @@ -297,7 +297,7 @@ impl Network { let connection_manager = self.connection_manager(); //////////////////////////////////////////////////////////// - let jh = spawn(async move { + let jh = spawn(&format!("TCP listener {}", addr), async move { // moves listener object in and get incoming iterator // when this task exists, the listener will close the socket diff --git a/veilid-core/src/network_manager/native/network_udp.rs b/veilid-core/src/network_manager/native/network_udp.rs index 6e082992..d4435245 100644 --- a/veilid-core/src/network_manager/native/network_udp.rs +++ b/veilid-core/src/network_manager/native/network_udp.rs @@ -16,14 +16,14 @@ impl Network { } } log_net!("task_count: {}", task_count); - for _ in 0..task_count { + for task_n in 0..task_count { log_net!("Spawning UDP listener task"); //////////////////////////////////////////////////////////// // Run thread task to process stream of messages let this = self.clone(); - let jh = spawn(async move { + let jh = spawn(&format!("UDP listener {}", task_n), async move { log_net!("UDP listener task spawned"); // Collect all our protocol handlers into a vector diff --git a/veilid-core/src/network_manager/network_connection.rs b/veilid-core/src/network_manager/network_connection.rs index 6acc0117..18a73dc9 100644 --- a/veilid-core/src/network_manager/network_connection.rs +++ b/veilid-core/src/network_manager/network_connection.rs @@ -148,7 +148,7 @@ impl NetworkConnection { let local_stop_token = stop_source.token(); // Spawn connection processor and pass in protocol connection - let processor = spawn(Self::process_connection( + let processor = spawn("connection processor", Self::process_connection( connection_manager, local_stop_token, manager_stop_token, diff --git a/veilid-core/src/network_manager/receipt_manager.rs b/veilid-core/src/network_manager/receipt_manager.rs index ae8bf083..45c9c38b 100644 --- a/veilid-core/src/network_manager/receipt_manager.rs +++ b/veilid-core/src/network_manager/receipt_manager.rs @@ -305,6 +305,7 @@ impl ReceiptManager { // Single-spawn the timeout task routine let _ = timeout_task .single_spawn( + "receipt timeout", self.clone() .timeout_task_routine(now, stop_token) .in_current_span(), diff --git a/veilid-core/src/network_manager/wasm/mod.rs b/veilid-core/src/network_manager/wasm/mod.rs index 25890b00..401af750 100644 --- a/veilid-core/src/network_manager/wasm/mod.rs +++ b/veilid-core/src/network_manager/wasm/mod.rs @@ -52,7 +52,6 @@ pub const MAX_CAPABILITIES: usize = 64; ///////////////////////////////////////////////////////////////// struct NetworkInner { - network_started: Option, network_needs_restart: bool, protocol_config: ProtocolConfig, } @@ -77,7 +76,6 @@ pub(in crate::network_manager) struct Network { impl Network { fn new_inner() -> NetworkInner { NetworkInner { - network_started: Some(false), network_needs_restart: false, protocol_config: Default::default(), } diff --git a/veilid-core/src/routing_table/mod.rs b/veilid-core/src/routing_table/mod.rs index 19c0d5e0..fc8acc10 100644 --- a/veilid-core/src/routing_table/mod.rs +++ b/veilid-core/src/routing_table/mod.rs @@ -221,14 +221,26 @@ impl RoutingTable { node_id: c.network.routing_table.node_id.clone(), node_id_secret: c.network.routing_table.node_id_secret.clone(), kick_queue: Mutex::new(BTreeSet::default()), - rolling_transfers_task: TickTask::new(ROLLING_TRANSFERS_INTERVAL_SECS), - kick_buckets_task: TickTask::new(1), - bootstrap_task: TickTask::new(1), - peer_minimum_refresh_task: TickTask::new(1), - closest_peers_refresh_task: TickTask::new_ms(c.network.dht.min_peer_refresh_time_ms), - ping_validator_task: TickTask::new(1), - relay_management_task: TickTask::new(RELAY_MANAGEMENT_INTERVAL_SECS), - private_route_management_task: TickTask::new(PRIVATE_ROUTE_MANAGEMENT_INTERVAL_SECS), + rolling_transfers_task: TickTask::new( + "rolling_transfers_task", + ROLLING_TRANSFERS_INTERVAL_SECS, + ), + kick_buckets_task: TickTask::new("kick_buckets_task", 1), + bootstrap_task: TickTask::new("bootstrap_task", 1), + peer_minimum_refresh_task: TickTask::new("peer_minimum_refresh_task", 1), + closest_peers_refresh_task: TickTask::new_ms( + "closest_peers_refresh_task", + c.network.dht.min_peer_refresh_time_ms, + ), + ping_validator_task: TickTask::new("ping_validator_task", 1), + relay_management_task: TickTask::new( + "relay_management_task", + RELAY_MANAGEMENT_INTERVAL_SECS, + ), + private_route_management_task: TickTask::new( + "private_route_management_task", + PRIVATE_ROUTE_MANAGEMENT_INTERVAL_SECS, + ), } } pub fn new(network_manager: NetworkManager) -> Self { diff --git a/veilid-core/src/rpc_processor/mod.rs b/veilid-core/src/rpc_processor/mod.rs index cca7766b..f1b4b4ed 100644 --- a/veilid-core/src/rpc_processor/mod.rs +++ b/veilid-core/src/rpc_processor/mod.rs @@ -392,10 +392,10 @@ impl RPCProcessor { "Spinning up {} RPC workers", self.unlocked_inner.concurrency ); - for _ in 0..self.unlocked_inner.concurrency { + for task_n in 0..self.unlocked_inner.concurrency { let this = self.clone(); let receiver = channel.1.clone(); - let jh = spawn(Self::rpc_worker( + let jh = spawn(&format!("rpc worker {}",task_n), Self::rpc_worker( this, inner.stop_source.as_ref().unwrap().token(), receiver, @@ -1679,14 +1679,9 @@ impl RPCProcessor { ) { while let Ok(Ok((_span_id, msg))) = receiver.recv_async().timeout_at(stop_token.clone()).await - { - //let rpc_worker_span = span!(parent: None, Level::TRACE, "rpc_worker recv"); - // xxx: causes crash (Missing otel data span extensions) - // rpc_worker_span.follows_from(span_id); - + { network_result_value_or_log!(match self .process_rpc_message(msg).in_current_span() - //.instrument(rpc_worker_span) .await { Err(e) => { diff --git a/veilid-core/src/storage_manager/get_value.rs b/veilid-core/src/storage_manager/get_value.rs index 1f2a785a..ef2eb2e6 100644 --- a/veilid-core/src/storage_manager/get_value.rs +++ b/veilid-core/src/storage_manager/get_value.rs @@ -225,7 +225,7 @@ impl StorageManager { }; // Call the fanout in a spawned task - spawn(Box::pin(async move { + spawn("outbound_get_value fanout", Box::pin(async move { let fanout_call = FanoutCall::new( routing_table.clone(), key, diff --git a/veilid-core/src/storage_manager/mod.rs b/veilid-core/src/storage_manager/mod.rs index c841e616..b34f494c 100644 --- a/veilid-core/src/storage_manager/mod.rs +++ b/veilid-core/src/storage_manager/mod.rs @@ -89,11 +89,26 @@ impl StorageManager { table_store, #[cfg(feature = "unstable-blockstore")] block_store, - flush_record_stores_task: TickTask::new(FLUSH_RECORD_STORES_INTERVAL_SECS), - offline_subkey_writes_task: TickTask::new(OFFLINE_SUBKEY_WRITES_INTERVAL_SECS), - send_value_changes_task: TickTask::new(SEND_VALUE_CHANGES_INTERVAL_SECS), - check_active_watches_task: TickTask::new(CHECK_ACTIVE_WATCHES_INTERVAL_SECS), - check_watched_records_task: TickTask::new(CHECK_WATCHED_RECORDS_INTERVAL_SECS), + flush_record_stores_task: TickTask::new( + "flush_record_stores_task", + FLUSH_RECORD_STORES_INTERVAL_SECS, + ), + offline_subkey_writes_task: TickTask::new( + "offline_subkey_writes_task", + OFFLINE_SUBKEY_WRITES_INTERVAL_SECS, + ), + send_value_changes_task: TickTask::new( + "send_value_changes_task", + SEND_VALUE_CHANGES_INTERVAL_SECS, + ), + check_active_watches_task: TickTask::new( + "check_active_watches_task", + CHECK_ACTIVE_WATCHES_INTERVAL_SECS, + ), + check_watched_records_task: TickTask::new( + "check_watched_records_task", + CHECK_WATCHED_RECORDS_INTERVAL_SECS, + ), anonymous_watch_keys, } diff --git a/veilid-core/src/storage_manager/set_value.rs b/veilid-core/src/storage_manager/set_value.rs index 7e343b80..dd77a1c5 100644 --- a/veilid-core/src/storage_manager/set_value.rs +++ b/veilid-core/src/storage_manager/set_value.rs @@ -224,7 +224,7 @@ impl StorageManager { }; // Call the fanout in a spawned task - spawn(Box::pin(async move { + spawn("outbound_set_value fanout", Box::pin(async move { let fanout_call = FanoutCall::new( routing_table.clone(), key, diff --git a/veilid-core/src/storage_manager/storage_manager_inner.rs b/veilid-core/src/storage_manager/storage_manager_inner.rs index 70f59092..f7a1a18e 100644 --- a/veilid-core/src/storage_manager/storage_manager_inner.rs +++ b/veilid-core/src/storage_manager/storage_manager_inner.rs @@ -133,7 +133,7 @@ impl StorageManagerInner { self.deferred_result_processor.init().await; // Schedule tick - let tick_future = interval(1000, move || { + let tick_future = interval("storage manager tick", 1000, move || { let this = outer_self.clone(); async move { if let Err(e) = this.tick().await { diff --git a/veilid-core/src/veilid_api/api.rs b/veilid-core/src/veilid_api/api.rs index 5c5f8123..a4179422 100644 --- a/veilid-core/src/veilid_api/api.rs +++ b/veilid-core/src/veilid_api/api.rs @@ -15,7 +15,7 @@ impl fmt::Debug for VeilidAPIInner { impl Drop for VeilidAPIInner { fn drop(&mut self) { if let Some(context) = self.context.take() { - spawn_detached(api_shutdown(context)); + spawn_detached("api shutdown", api_shutdown(context)); } } } diff --git a/veilid-server/src/client_api.rs b/veilid-server/src/client_api.rs index 22b47838..33cd160c 100644 --- a/veilid-server/src/client_api.rs +++ b/veilid-server/src/client_api.rs @@ -145,7 +145,11 @@ impl ClientApi { let t_awg = awg.clone(); // Process the connection - spawn(self.clone().handle_ipc_connection(stream, t_awg)).detach(); + spawn( + "client_api handle_ipc_connection", + self.clone().handle_ipc_connection(stream, t_awg), + ) + .detach(); } // Wait for all connections to terminate @@ -183,7 +187,11 @@ impl ClientApi { let t_awg = awg.clone(); // Process the connection - spawn(self.clone().handle_tcp_connection(stream, t_awg)).detach(); + spawn( + "client_api handle_tcp_connection", + self.clone().handle_tcp_connection(stream, t_awg), + ) + .detach(); } // Wait for all connections to terminate @@ -543,6 +551,6 @@ impl ClientApi { } let bind_futures_join = join_all(bind_futures); - self.inner.lock().join_handle = Some(spawn(bind_futures_join)); + self.inner.lock().join_handle = Some(spawn("client_api bind_futures", bind_futures_join)); } } diff --git a/veilid-server/src/server.rs b/veilid-server/src/server.rs index 33b519fc..0a21dbc0 100644 --- a/veilid-server/src/server.rs +++ b/veilid-server/src/server.rs @@ -108,7 +108,7 @@ pub async fn run_veilid_server( let capi2 = capi.clone(); let update_receiver_shutdown = SingleShotEventual::new(Some(())); let mut update_receiver_shutdown_instance = update_receiver_shutdown.instance().fuse(); - let update_receiver_jh = spawn_local(async move { + let update_receiver_jh = spawn_local("update_receiver", async move { loop { select! { res = receiver.recv_async() => { diff --git a/veilid-server/src/unix.rs b/veilid-server/src/unix.rs index 2d3d7fd4..af0d65f8 100644 --- a/veilid-server/src/unix.rs +++ b/veilid-server/src/unix.rs @@ -34,7 +34,7 @@ pub async fn run_veilid_server_with_signals( Signals::new([SIGHUP, SIGTERM, SIGINT, SIGQUIT]).wrap_err("failed to init signals")?; let handle = signals.handle(); - let signals_task = spawn(handle_signals(signals)); + let signals_task = spawn("signals", handle_signals(signals)); // Run veilid server let res = run_veilid_server(settings, server_mode, veilid_logs).await; diff --git a/veilid-tools/src/deferred_stream_processor.rs b/veilid-tools/src/deferred_stream_processor.rs index de9577f7..13f036fe 100644 --- a/veilid-tools/src/deferred_stream_processor.rs +++ b/veilid-tools/src/deferred_stream_processor.rs @@ -32,7 +32,10 @@ impl DeferredStreamProcessor { self.opt_stopper = Some(stopper); let (dsc_tx, dsc_rx) = flume::unbounded::>(); self.opt_deferred_stream_channel = Some(dsc_tx); - self.opt_join_handle = Some(spawn(Self::processor(stop_token, dsc_rx))); + self.opt_join_handle = Some(spawn( + "deferred stream processor", + Self::processor(stop_token, dsc_rx), + )); } /// Terminate the processor and ensure all streams are closed diff --git a/veilid-tools/src/interval.rs b/veilid-tools/src/interval.rs index 1d9a0bee..2ea5974f 100644 --- a/veilid-tools/src/interval.rs +++ b/veilid-tools/src/interval.rs @@ -3,7 +3,7 @@ use super::*; cfg_if! { if #[cfg(target_arch = "wasm32")] { - pub fn interval(freq_ms: u32, callback: F) -> SendPinBoxFuture<()> + pub fn interval(name: &str, freq_ms: u32, callback: F) -> SendPinBoxFuture<()> where F: Fn() -> FUT + Send + Sync + 'static, FUT: Future + Send, @@ -11,7 +11,7 @@ cfg_if! { let e = Eventual::new(); let ie = e.clone(); - let jh = spawn(Box::pin(async move { + let jh = spawn(name, Box::pin(async move { while timeout(freq_ms, ie.instance_clone(())).await.is_err() { callback().await; } @@ -25,7 +25,7 @@ cfg_if! { } else { - pub fn interval(freq_ms: u32, callback: F) -> SendPinBoxFuture<()> + pub fn interval(name: &str, freq_ms: u32, callback: F) -> SendPinBoxFuture<()> where F: Fn() -> FUT + Send + Sync + 'static, FUT: Future + Send, @@ -33,7 +33,7 @@ cfg_if! { let e = Eventual::new(); let ie = e.clone(); - let jh = spawn(async move { + let jh = spawn(name, async move { while timeout(freq_ms, ie.instance_clone(())).await.is_err() { callback().await; } diff --git a/veilid-tools/src/must_join_single_future.rs b/veilid-tools/src/must_join_single_future.rs index d9857ad6..e89f000d 100644 --- a/veilid-tools/src/must_join_single_future.rs +++ b/veilid-tools/src/must_join_single_future.rs @@ -124,6 +124,7 @@ where // Possibly spawn the future possibly returning the value of the last execution pub async fn single_spawn_local( &self, + name: &str, future: impl Future + 'static, ) -> Result<(Option, bool), ()> { let mut out: Option = None; @@ -152,7 +153,7 @@ where // Run if we should do that if run { - self.unlock(Some(spawn_local(future))); + self.unlock(Some(spawn_local(name, future))); } // Return the prior result if we have one @@ -166,6 +167,7 @@ where { pub async fn single_spawn( &self, + name: &str, future: impl Future + Send + 'static, ) -> Result<(Option, bool), ()> { let mut out: Option = None; @@ -191,7 +193,7 @@ where } // Run if we should do that if run { - self.unlock(Some(spawn(future))); + self.unlock(Some(spawn(name, future))); } // Return the prior result if we have one Ok((out, run)) diff --git a/veilid-tools/src/spawn.rs b/veilid-tools/src/spawn.rs index 8d85b01c..5b494c1c 100644 --- a/veilid-tools/src/spawn.rs +++ b/veilid-tools/src/spawn.rs @@ -4,7 +4,7 @@ cfg_if! { if #[cfg(target_arch = "wasm32")] { use async_executors::{Bindgen, LocalSpawnHandleExt, SpawnHandleExt}; - pub fn spawn(future: impl Future + Send + 'static) -> MustJoinHandle + pub fn spawn(_name: &str, future: impl Future + Send + 'static) -> MustJoinHandle where Out: Send + 'static, { @@ -15,7 +15,7 @@ cfg_if! { ) } - pub fn spawn_local(future: impl Future + 'static) -> MustJoinHandle + pub fn spawn_local(_name: &str, future: impl Future + 'static) -> MustJoinHandle where Out: 'static, { @@ -26,7 +26,7 @@ cfg_if! { ) } - pub fn spawn_detached(future: impl Future + Send + 'static) + pub fn spawn_detached(_name: &str, future: impl Future + Send + 'static) where Out: Send + 'static, { @@ -35,7 +35,7 @@ cfg_if! { .expect("wasm-bindgen-futures spawn_handle_local should never error out") .detach() } - pub fn spawn_detached_local(future: impl Future + 'static) + pub fn spawn_detached_local(_name: &str, future: impl Future + 'static) where Out: 'static, { @@ -47,60 +47,60 @@ cfg_if! { } else { - pub fn spawn(future: impl Future + Send + 'static) -> MustJoinHandle + pub fn spawn(name: &str, future: impl Future + Send + 'static) -> MustJoinHandle where Out: Send + 'static, { cfg_if! { if #[cfg(feature="rt-async-std")] { - MustJoinHandle::new(async_std::task::spawn(future)) + MustJoinHandle::new(async_std::task::Builder::new().name(name).spawn(future).unwrap()) } else if #[cfg(feature="rt-tokio")] { - MustJoinHandle::new(tokio::task::spawn(future)) + MustJoinHandle::new(tokio::task::Builder::new().name(name).spawn(future).unwrap()) } } } - pub fn spawn_local(future: impl Future + 'static) -> MustJoinHandle + pub fn spawn_local(name: &str, future: impl Future + 'static) -> MustJoinHandle where Out: 'static, { cfg_if! { if #[cfg(feature="rt-async-std")] { - MustJoinHandle::new(async_std::task::spawn_local(future)) + MustJoinHandle::new(async_std::task::Builder::new().name(name).local(future).unwrap()) } else if #[cfg(feature="rt-tokio")] { - MustJoinHandle::new(tokio::task::spawn_local(future)) + MustJoinHandle::new(tokio::task::Builder::new().name(name).spawn_local(future).unwrap()) } } } - pub fn spawn_detached(future: impl Future + Send + 'static) + pub fn spawn_detached(name: &str, future: impl Future + Send + 'static) where Out: Send + 'static, { cfg_if! { if #[cfg(feature="rt-async-std")] { - drop(async_std::task::spawn(future)); + drop(async_std::task::Builder::new().name(name).spawn(future).unwrap()); } else if #[cfg(feature="rt-tokio")] { - drop(tokio::task::spawn(future)); + drop(tokio::task::Builder::new().name(name).spawn(future).unwrap()); } } } - pub fn spawn_detached_local(future: impl Future + 'static) + pub fn spawn_detached_local(name: &str,future: impl Future + 'static) where Out: 'static, { cfg_if! { if #[cfg(feature="rt-async-std")] { - drop(async_std::task::spawn_local(future)); + drop(async_std::task::Builder::new().name(name).local(future).unwrap()); } else if #[cfg(feature="rt-tokio")] { - drop(tokio::task::spawn_local(future)); + drop(tokio::task::Builder::new().name(name).spawn_local(future).unwrap()); } } } #[allow(unused_variables)] - pub async fn blocking_wrapper(blocking_task: F, err_result: R) -> R + pub async fn blocking_wrapper(name: &str, blocking_task: F, err_result: R) -> R where F: FnOnce() -> R + Send + 'static, R: Send + 'static, @@ -108,9 +108,9 @@ cfg_if! { // run blocking stuff in blocking thread cfg_if! { if #[cfg(feature="rt-async-std")] { - async_std::task::spawn_blocking(blocking_task).await + async_std::task::Builder::new().name(name).blocking(blocking_task) } else if #[cfg(feature="rt-tokio")] { - tokio::task::spawn_blocking(blocking_task).await.unwrap_or(err_result) + tokio::task::Builder::new().name(name).spawn_blocking(blocking_task).unwrap().await.unwrap_or(err_result) } else { #[compile_error("must use an executor")] } diff --git a/veilid-tools/src/tests/common/test_async_tag_lock.rs b/veilid-tools/src/tests/common/test_async_tag_lock.rs index caef3001..1789cd16 100644 --- a/veilid-tools/src/tests/common/test_async_tag_lock.rs +++ b/veilid-tools/src/tests/common/test_async_tag_lock.rs @@ -35,7 +35,7 @@ pub async fn test_simple_single_contention() { let g1 = table.lock_tag(a1).await; info!("locked"); - let t1 = spawn(async move { + let t1 = spawn("t1", async move { // move the guard into the task let _g1_take = g1; // hold the guard for a bit @@ -90,7 +90,7 @@ pub async fn test_simple_double_contention() { let g2 = table.lock_tag(a2).await; info!("locked"); - let t1 = spawn(async move { + let t1 = spawn("t1", async move { // move the guard into the tas let _g1_take = g1; // hold the guard for a bit @@ -99,7 +99,7 @@ pub async fn test_simple_double_contention() { // release the guard info!("released"); }); - let t2 = spawn(async move { + let t2 = spawn("t2", async move { // move the guard into the task let _g2_take = g2; // hold the guard for a bit @@ -131,7 +131,7 @@ pub async fn test_parallel_single_contention() { let a1 = SocketAddr::new("1.2.3.4".parse().unwrap(), 1234); let table1 = table.clone(); - let t1 = spawn(async move { + let t1 = spawn("t1", async move { // lock the tag let _g = table1.lock_tag(a1).await; info!("locked t1"); @@ -143,7 +143,7 @@ pub async fn test_parallel_single_contention() { }); let table2 = table.clone(); - let t2 = spawn(async move { + let t2 = spawn("t2", async move { // lock the tag let _g = table2.lock_tag(a1).await; info!("locked t2"); @@ -155,7 +155,7 @@ pub async fn test_parallel_single_contention() { }); let table3 = table.clone(); - let t3 = spawn(async move { + let t3 = spawn("t3", async move { // lock the tag let _g = table3.lock_tag(a1).await; info!("locked t3"); diff --git a/veilid-tools/src/tests/common/test_host_interface.rs b/veilid-tools/src/tests/common/test_host_interface.rs index f694e866..014af920 100644 --- a/veilid-tools/src/tests/common/test_host_interface.rs +++ b/veilid-tools/src/tests/common/test_host_interface.rs @@ -30,7 +30,7 @@ pub async fn test_eventual() { let i4 = e1.instance_clone(4u32); drop(i2); - let jh = spawn(async move { + let jh = spawn("task", async move { sleep(1000).await; e1.resolve(); }); @@ -47,7 +47,7 @@ pub async fn test_eventual() { let i3 = e1.instance_clone(3u32); let i4 = e1.instance_clone(4u32); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { let i5 = e1.instance_clone(5u32); let i6 = e1.instance_clone(6u32); assert_eq!(i1.await, 1u32); @@ -67,7 +67,7 @@ pub async fn test_eventual() { let i1 = e1.instance_clone(1u32); let i2 = e1.instance_clone(2u32); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { assert_eq!(i1.await, 1u32); assert_eq!(i2.await, 2u32); }); @@ -80,7 +80,7 @@ pub async fn test_eventual() { // let j1 = e1.instance_clone(1u32); let j2 = e1.instance_clone(2u32); - let jh = spawn(async move { + let jh = spawn("task", async move { assert_eq!(j1.await, 1u32); assert_eq!(j2.await, 2u32); }); @@ -105,7 +105,7 @@ pub async fn test_eventual_value() { drop(i2); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { sleep(1000).await; e1_c1.resolve(3u32); }); @@ -122,7 +122,7 @@ pub async fn test_eventual_value() { let i3 = e1.instance(); let i4 = e1.instance(); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { let i5 = e1.instance(); let i6 = e1.instance(); i1.await; @@ -144,7 +144,7 @@ pub async fn test_eventual_value() { let i1 = e1.instance(); let i2 = e1.instance(); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { i1.await; i2.await; }); @@ -157,7 +157,7 @@ pub async fn test_eventual_value() { // let j1 = e1.instance(); let j2 = e1.instance(); - let jh = spawn(async move { + let jh = spawn("task", async move { j1.await; j2.await; }); @@ -181,7 +181,7 @@ pub async fn test_eventual_value_clone() { let i4 = e1.instance(); drop(i2); - let jh = spawn(async move { + let jh = spawn("task", async move { sleep(1000).await; e1.resolve(3u32); }); @@ -199,7 +199,7 @@ pub async fn test_eventual_value_clone() { let i3 = e1.instance(); let i4 = e1.instance(); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { let i5 = e1.instance(); let i6 = e1.instance(); assert_eq!(i1.await, 4); @@ -220,7 +220,7 @@ pub async fn test_eventual_value_clone() { let i1 = e1.instance(); let i2 = e1.instance(); let e1_c1 = e1.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { assert_eq!(i1.await, 5); assert_eq!(i2.await, 5); }); @@ -231,7 +231,7 @@ pub async fn test_eventual_value_clone() { // let j1 = e1.instance(); let j2 = e1.instance(); - let jh = spawn(async move { + let jh = spawn("task", async move { assert_eq!(j1.await, 6); assert_eq!(j2.await, 6); }); @@ -245,7 +245,7 @@ pub async fn test_interval() { info!("testing interval"); let tick: Arc> = Arc::new(Mutex::new(0u32)); - let stopper = interval(1000, move || { + let stopper = interval("interval", 1000, move || { let tick = tick.clone(); async move { let mut tick = tick.lock(); @@ -493,7 +493,7 @@ pub async fn test_must_join_single_future() { let sf = MustJoinSingleFuture::::new(); assert_eq!(sf.check().await, Ok(None)); assert_eq!( - sf.single_spawn(async { + sf.single_spawn("t1", async { sleep(2000).await; 69 }) @@ -501,10 +501,13 @@ pub async fn test_must_join_single_future() { Ok((None, true)) ); assert_eq!(sf.check().await, Ok(None)); - assert_eq!(sf.single_spawn(async { panic!() }).await, Ok((None, false))); + assert_eq!( + sf.single_spawn("t2", async { panic!() }).await, + Ok((None, false)) + ); assert_eq!(sf.join().await, Ok(Some(69))); assert_eq!( - sf.single_spawn(async { + sf.single_spawn("t3", async { sleep(1000).await; 37 }) @@ -513,7 +516,7 @@ pub async fn test_must_join_single_future() { ); sleep(2000).await; assert_eq!( - sf.single_spawn(async { + sf.single_spawn("t4", async { sleep(1000).await; 27 }) diff --git a/veilid-tools/src/tests/common/test_startup_lock.rs b/veilid-tools/src/tests/common/test_startup_lock.rs index d8823839..4efb1bf7 100644 --- a/veilid-tools/src/tests/common/test_startup_lock.rs +++ b/veilid-tools/src/tests/common/test_startup_lock.rs @@ -62,7 +62,7 @@ pub async fn test_contention() { assert!(lock.is_started()); let lock2 = lock.clone(); let val2 = val.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { let _guard = lock2.enter().expect("should enter"); sleep(2000).await; val2.store(true, Ordering::Release); @@ -95,7 +95,7 @@ pub async fn test_bad_enter() { assert!(!lock.is_shut_down()); let lock2 = lock.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { let guard = lock2.shutdown().await.expect("should shutdown"); sleep(2000).await; guard.success(); @@ -139,7 +139,7 @@ pub async fn test_multiple_enter() { //eprintln!("1"); let lock2 = lock.clone(); - let jh = spawn(async move { + let jh = spawn("task", async move { //eprintln!("2"); let guard = lock2.shutdown().await.expect("should shutdown"); //eprintln!("7"); diff --git a/veilid-tools/src/tick_task.rs b/veilid-tools/src/tick_task.rs index 339bd49d..d0f5d9fe 100644 --- a/veilid-tools/src/tick_task.rs +++ b/veilid-tools/src/tick_task.rs @@ -10,6 +10,7 @@ type TickTaskRoutine = /// If the prior tick is still running, it will allow it to finish, and do another tick when the timer comes around again. /// One should attempt to make tasks short-lived things that run in less than the tick period if you want things to happen with regular periodicity. pub struct TickTask { + name: String, last_timestamp_us: AtomicU64, tick_period_us: u64, routine: OnceCell>>, @@ -19,8 +20,9 @@ pub struct TickTask { } impl TickTask { - pub fn new_us(tick_period_us: u64) -> Self { + pub fn new_us(name: &str, tick_period_us: u64) -> Self { Self { + name: name.to_string(), last_timestamp_us: AtomicU64::new(0), tick_period_us, routine: OnceCell::new(), @@ -29,8 +31,9 @@ impl TickTask { running: Arc::new(AtomicBool::new(false)), } } - pub fn new_ms(tick_period_ms: u32) -> Self { + pub fn new_ms(name: &str, tick_period_ms: u32) -> Self { Self { + name: name.to_string(), last_timestamp_us: AtomicU64::new(0), tick_period_us: (tick_period_ms as u64) * 1000u64, routine: OnceCell::new(), @@ -39,8 +42,9 @@ impl TickTask { running: Arc::new(AtomicBool::new(false)), } } - pub fn new(tick_period_sec: u32) -> Self { + pub fn new(name: &str, tick_period_sec: u32) -> Self { Self { + name: name.to_string(), last_timestamp_us: AtomicU64::new(0), tick_period_us: (tick_period_sec as u64) * 1000000u64, routine: OnceCell::new(), @@ -147,7 +151,11 @@ impl TickTask { running.store(false, core::sync::atomic::Ordering::Release); out }); - match self.single_future.single_spawn(wrapped_routine).await { + match self + .single_future + .single_spawn(&self.name, wrapped_routine) + .await + { // We should have already consumed the result of the last run, or there was none // and we should definitely have run, because the prior 'check()' operation // should have ensured the singlefuture was ready to run From 2ec843cd17f12f1ff1be0cfca8a7af9b863ea8aa Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sun, 21 Jul 2024 21:15:54 -0400 Subject: [PATCH 07/13] clean up spans and correct shutdown behavior. clean up reverse connection and hole punch errors --- veilid-cli/Cargo.toml | 6 +- veilid-core/Cargo.toml | 1 + veilid-core/src/attachment_manager.rs | 2 +- .../src/network_manager/address_filter.rs | 1 + veilid-core/src/network_manager/mod.rs | 13 +-- .../native/discovery_context.rs | 7 +- .../src/network_manager/native/igd_manager.rs | 4 +- veilid-core/src/network_manager/native/mod.rs | 8 +- .../native/network_class_discovery.rs | 9 +- .../src/network_manager/native/network_tcp.rs | 9 +- .../src/network_manager/native/network_udp.rs | 3 + .../native/protocol/sockets.rs | 10 +- .../network_manager/native/protocol/tcp.rs | 2 +- .../src/network_manager/native/protocol/ws.rs | 2 +- .../network_manager/native/start_protocols.rs | 5 + .../src/network_manager/network_connection.rs | 9 +- .../src/network_manager/receipt_manager.rs | 11 +- veilid-core/src/network_manager/send_data.rs | 104 ++++++++++++------ veilid-core/src/network_manager/tasks/mod.rs | 1 + .../tasks/public_address_check.rs | 4 +- veilid-core/src/network_manager/wasm/mod.rs | 1 + veilid-core/src/routing_table/tasks/mod.rs | 1 + .../src/routing_table/tasks/ping_validator.rs | 18 ++- veilid-core/src/rpc_processor/fanout_call.rs | 14 ++- veilid-core/src/rpc_processor/mod.rs | 31 +++--- .../src/rpc_processor/operation_waiter.rs | 53 ++++----- veilid-core/src/rpc_processor/rpc_app_call.rs | 8 +- .../rpc_processor/rpc_validate_dial_info.rs | 49 ++++++--- veilid-core/src/storage_manager/get_value.rs | 6 +- .../src/storage_manager/inspect_value.rs | 2 +- veilid-core/src/storage_manager/set_value.rs | 6 +- veilid-core/src/storage_manager/tasks/mod.rs | 2 + .../tasks/send_value_changes.rs | 19 +++- .../src/storage_manager/watch_value.rs | 2 +- veilid-core/src/veilid_api/api.rs | 1 - veilid-server/Cargo.toml | 1 + veilid-server/src/server.rs | 34 +++--- veilid-tools/src/eventual.rs | 2 +- veilid-tools/src/eventual_value.rs | 2 +- veilid-tools/src/eventual_value_clone.rs | 2 +- veilid-tools/src/lib.rs | 1 + veilid-tools/src/must_join_single_future.rs | 2 + veilid-tools/src/startup_lock.rs | 61 ++++++---- veilid-tools/src/tick_task.rs | 18 ++- veilid-tools/src/timeout.rs | 10 +- veilid-tools/src/tools.rs | 9 +- 46 files changed, 354 insertions(+), 212 deletions(-) diff --git a/veilid-cli/Cargo.toml b/veilid-cli/Cargo.toml index 1a48797f..4f6c4124 100644 --- a/veilid-cli/Cargo.toml +++ b/veilid-cli/Cargo.toml @@ -22,12 +22,12 @@ rt-async-std = [ rt-tokio = ["tokio", "tokio-util", "veilid-tools/rt-tokio", "cursive/rt-tokio"] [dependencies] -async-std = { version = "^1.12", features = [ +async-std = { version = "1.12.0", features = [ "unstable", "attributes", ], optional = true } -tokio = { version = "^1", features = ["full"], optional = true } -tokio-util = { version = "^0", features = ["compat"], optional = true } +tokio = { version = "1.38.1", features = ["full", "tracing"], optional = true } +tokio-util = { version = "0.7.11", features = ["compat"], optional = true } async-tungstenite = { version = "^0.23" } cursive = { git = "https://gitlab.com/veilid/cursive.git", default-features = false, features = [ "crossterm", diff --git a/veilid-core/Cargo.toml b/veilid-core/Cargo.toml index ebcc40d9..160cd873 100644 --- a/veilid-core/Cargo.toml +++ b/veilid-core/Cargo.toml @@ -51,6 +51,7 @@ crypto-test = ["enable-crypto-vld0", "enable-crypto-none"] crypto-test-none = ["enable-crypto-none"] veilid_core_android_tests = ["dep:paranoid-android"] veilid_core_ios_tests = ["dep:tracing-oslog"] +debug-locks = ["veilid-tools/debug-locks"] ### DEPENDENCIES diff --git a/veilid-core/src/attachment_manager.rs b/veilid-core/src/attachment_manager.rs index eaea459a..2a501d15 100644 --- a/veilid-core/src/attachment_manager.rs +++ b/veilid-core/src/attachment_manager.rs @@ -211,7 +211,7 @@ impl AttachmentManager { } } - #[instrument(level = "debug", skip_all)] + #[instrument(parent = None, level = "debug", skip_all)] async fn attachment_maintainer(self) { log_net!(debug "attachment starting"); self.update_attaching_detaching_state(AttachmentState::Attaching); diff --git a/veilid-core/src/network_manager/address_filter.rs b/veilid-core/src/network_manager/address_filter.rs index 1e35f44b..5247026e 100644 --- a/veilid-core/src/network_manager/address_filter.rs +++ b/veilid-core/src/network_manager/address_filter.rs @@ -337,6 +337,7 @@ impl AddressFilter { .or_insert(punishment); } + #[instrument(parent = None, level = "trace", skip_all, err)] pub async fn address_filter_task_routine( self, _stop_token: StopToken, diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index ef69b170..ba956405 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -64,8 +64,6 @@ pub const PUBLIC_ADDRESS_INCONSISTENCY_PUNISHMENT_TIMEOUT_US: TimestampDuration pub const ADDRESS_FILTER_TASK_INTERVAL_SECS: u32 = 60; pub const BOOT_MAGIC: &[u8; 4] = b"BOOT"; -static FUCK: AtomicUsize = AtomicUsize::new(0); - #[derive(Clone, Debug, Default)] pub struct ProtocolConfig { pub outbound: ProtocolTypeSet, @@ -586,7 +584,7 @@ impl NetworkManager { } /// Generates a multi-shot/normal receipt - #[instrument(level = "trace", skip(self, extra_data, callback), err)] + #[instrument(level = "trace", skip(self, extra_data, callback))] pub fn generate_receipt>( &self, expiration_us: u64, @@ -626,7 +624,7 @@ impl NetworkManager { } /// Generates a single-shot/normal receipt - #[instrument(level = "trace", skip(self, extra_data), err)] + #[instrument(level = "trace", skip(self, extra_data))] pub fn generate_single_shot_receipt>( &self, expiration_us: u64, @@ -918,10 +916,8 @@ impl NetworkManager { destination_node_ref: Option, body: B, ) -> EyreResult> { - let _dg = DebugGuard::new(&FUCK); - let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { - bail!("network is not started"); + return Ok(NetworkResult::no_connection_other("network is not started")); }; let destination_node_ref = destination_node_ref.as_ref().unwrap_or(&node_ref).clone(); @@ -962,7 +958,8 @@ impl NetworkManager { rcpt_data: Vec, ) -> EyreResult<()> { let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { - bail!("network is not started"); + log_net!(debug "not sending out-of-band receipt to {} because network is stopped", dial_info); + return Ok(()); }; // Do we need to validate the outgoing receipt? Probably not diff --git a/veilid-core/src/network_manager/native/discovery_context.rs b/veilid-core/src/network_manager/native/discovery_context.rs index 9a5be337..69a3129e 100644 --- a/veilid-core/src/network_manager/native/discovery_context.rs +++ b/veilid-core/src/network_manager/native/discovery_context.rs @@ -262,7 +262,7 @@ impl DiscoveryContext { // Always process two at a time so we get both addresses in parallel if possible if unord.len() == 2 { // Process one - if let Some(Some(ei)) = unord.next().await { + if let Some(Some(ei)) = unord.next().in_current_span().await { external_address_infos.push(ei); if external_address_infos.len() == 2 { break; @@ -272,7 +272,7 @@ impl DiscoveryContext { } // Finish whatever is left if we need to if external_address_infos.len() < 2 { - while let Some(res) = unord.next().await { + while let Some(res) = unord.next().in_current_span().await { if let Some(ei) = res { external_address_infos.push(ei); if external_address_infos.len() == 2 { @@ -644,6 +644,7 @@ impl DiscoveryContext { } /// Add discovery futures to an unordered set that may detect dialinfo when they complete + #[instrument(level = "trace", skip(self))] pub async fn discover( &self, unord: &mut FuturesUnordered>>, @@ -681,7 +682,7 @@ impl DiscoveryContext { } }; if let Some(clear_network_callback) = some_clear_network_callback { - clear_network_callback().await; + clear_network_callback().in_current_span().await; } // UPNP Automatic Mapping diff --git a/veilid-core/src/network_manager/native/igd_manager.rs b/veilid-core/src/network_manager/native/igd_manager.rs index c867323a..bdda356c 100644 --- a/veilid-core/src/network_manager/native/igd_manager.rs +++ b/veilid-core/src/network_manager/native/igd_manager.rs @@ -310,7 +310,7 @@ impl IGDManager { .await } - #[instrument(level = "trace", target = "net", skip_all, err)] + #[instrument(level = "trace", target = "net", name = "IGDManager::tick", skip_all, err)] pub async fn tick(&self) -> EyreResult { // Refresh mappings if we have them // If an error is received, then return false to restart the local network @@ -434,6 +434,6 @@ impl IGDManager { // Normal exit, no restart Ok(true) - }, Err(eyre!("failed to process blocking task"))).in_current_span().await + }, Err(eyre!("failed to process blocking task"))).instrument(tracing::trace_span!("igd tick fut")).await } } diff --git a/veilid-core/src/network_manager/native/mod.rs b/veilid-core/src/network_manager/native/mod.rs index 1e9a0400..44d6dcf4 100644 --- a/veilid-core/src/network_manager/native/mod.rs +++ b/veilid-core/src/network_manager/native/mod.rs @@ -518,7 +518,7 @@ impl Network { let mut out = vec![0u8; MAX_MESSAGE_SIZE]; let (recv_len, recv_addr) = network_result_try!(timeout( timeout_ms, - h.recv_message(&mut out).instrument(Span::current()) + h.recv_message(&mut out).in_current_span() ) .await .into_network_result()) @@ -569,7 +569,7 @@ impl Network { let out = network_result_try!(network_result_try!(timeout( timeout_ms, - pnc.recv() + pnc.recv().in_current_span() ) .await .into_network_result()) @@ -1063,7 +1063,7 @@ impl Network { Ok(()) } - #[instrument(level = "trace", target = "net", skip_all, err)] + #[instrument(parent = None, level = "trace", target = "net", skip_all, err)] async fn upnp_task_routine(self, _stop_token: StopToken, _l: u64, _t: u64) -> EyreResult<()> { if !self.unlocked_inner.igd_manager.tick().await? { info!("upnp failed, restarting local network"); @@ -1074,7 +1074,7 @@ impl Network { Ok(()) } - #[instrument(level = "trace", target = "net", skip_all, err)] + #[instrument(level = "trace", target = "net", name = "Network::tick", skip_all, err)] pub(crate) async fn tick(&self) -> EyreResult<()> { let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { log_net!(debug "ignoring due to not started up"); diff --git a/veilid-core/src/network_manager/native/network_class_discovery.rs b/veilid-core/src/network_manager/native/network_class_discovery.rs index 2c33f205..bf6a9c16 100644 --- a/veilid-core/src/network_manager/native/network_class_discovery.rs +++ b/veilid-core/src/network_manager/native/network_class_discovery.rs @@ -200,7 +200,12 @@ impl Network { // Wait for all discovery futures to complete and apply discoverycontexts let mut all_address_types = AddressTypeSet::new(); loop { - match unord.next().timeout_at(stop_token.clone()).await { + match unord + .next() + .timeout_at(stop_token.clone()) + .in_current_span() + .await + { Ok(Some(Some(dr))) => { // Found some new dial info for this protocol/address combination self.update_with_detected_dial_info(dr.ddi.clone()).await?; @@ -277,7 +282,7 @@ impl Network { Ok(()) } - #[instrument(level = "trace", skip(self), err)] + #[instrument(parent = None, level = "trace", skip(self), err)] pub async fn update_network_class_task_routine( self, stop_token: StopToken, diff --git a/veilid-core/src/network_manager/native/network_tcp.rs b/veilid-core/src/network_manager/native/network_tcp.rs index 60d26a35..ca9c862e 100644 --- a/veilid-core/src/network_manager/native/network_tcp.rs +++ b/veilid-core/src/network_manager/native/network_tcp.rs @@ -38,6 +38,7 @@ impl Network { Ok(acceptor) } + #[instrument(level = "trace", skip_all)] async fn try_tls_handlers( &self, tls_acceptor: &TlsAcceptor, @@ -60,7 +61,7 @@ impl Network { // read a chunk of the stream timeout( tls_connection_initial_timeout_ms, - ps.peek_exact(&mut first_packet), + ps.peek_exact(&mut first_packet).in_current_span(), ) .await .wrap_err("tls initial timeout")? @@ -70,6 +71,7 @@ impl Network { .await } + #[instrument(level = "trace", skip_all)] async fn try_handlers( &self, stream: AsyncPeekStream, @@ -90,6 +92,7 @@ impl Network { Ok(None) } + #[instrument(level = "trace", skip_all)] async fn tcp_acceptor( self, tcp_stream: io::Result, @@ -180,7 +183,7 @@ impl Network { // read a chunk of the stream if timeout( connection_initial_timeout_ms, - ps.peek_exact(&mut first_packet), + ps.peek_exact(&mut first_packet).in_current_span(), ) .await .is_err() @@ -237,6 +240,7 @@ impl Network { } } + #[instrument(level = "trace", skip_all)] async fn spawn_socket_listener(&self, addr: SocketAddr) -> EyreResult { // Get config let (connection_initial_timeout_ms, tls_connection_initial_timeout_ms) = { @@ -344,6 +348,7 @@ impl Network { ///////////////////////////////////////////////////////////////// // TCP listener that multiplexes ports so multiple protocols can exist on a single port + #[instrument(level = "trace", skip_all)] pub(super) async fn start_tcp_listener( &self, bind_set: NetworkBindSet, diff --git a/veilid-core/src/network_manager/native/network_udp.rs b/veilid-core/src/network_manager/native/network_udp.rs index d4435245..b7caa29f 100644 --- a/veilid-core/src/network_manager/native/network_udp.rs +++ b/veilid-core/src/network_manager/native/network_udp.rs @@ -3,6 +3,7 @@ use sockets::*; use stop_token::future::FutureExt; impl Network { + #[instrument(level = "trace", skip_all)] pub(super) async fn create_udp_listener_tasks(&self) -> EyreResult<()> { // Spawn socket tasks let mut task_count = { @@ -108,6 +109,7 @@ impl Network { Ok(()) } + #[instrument(level = "trace", skip_all)] async fn create_udp_protocol_handler(&self, addr: SocketAddr) -> EyreResult { log_net!("create_udp_protocol_handler on {:?}", &addr); @@ -148,6 +150,7 @@ impl Network { Ok(true) } + #[instrument(level = "trace", skip_all)] pub(super) async fn create_udp_protocol_handlers( &self, bind_set: NetworkBindSet, diff --git a/veilid-core/src/network_manager/native/protocol/sockets.rs b/veilid-core/src/network_manager/native/protocol/sockets.rs index da100d6f..2ef43277 100644 --- a/veilid-core/src/network_manager/native/protocol/sockets.rs +++ b/veilid-core/src/network_manager/native/protocol/sockets.rs @@ -162,10 +162,12 @@ pub async fn nonblocking_connect( let async_stream = Async::new(std::net::TcpStream::from(socket))?; // The stream becomes writable when connected - timeout_or_try!(timeout(timeout_ms, async_stream.writable()) - .await - .into_timeout_or() - .into_result()?); + timeout_or_try!( + timeout(timeout_ms, async_stream.writable().in_current_span()) + .await + .into_timeout_or() + .into_result()? + ); // Check low level error let async_stream = match async_stream.get_ref().take_error()? { diff --git a/veilid-core/src/network_manager/native/protocol/tcp.rs b/veilid-core/src/network_manager/native/protocol/tcp.rs index 90ae9f95..0acbb97d 100644 --- a/veilid-core/src/network_manager/native/protocol/tcp.rs +++ b/veilid-core/src/network_manager/native/protocol/tcp.rs @@ -134,7 +134,7 @@ impl RawTcpProtocolHandler { let mut peekbuf: [u8; PEEK_DETECT_LEN] = [0u8; PEEK_DETECT_LEN]; if (timeout( self.connection_initial_timeout_ms, - ps.peek_exact(&mut peekbuf), + ps.peek_exact(&mut peekbuf).in_current_span(), ) .await) .is_err() diff --git a/veilid-core/src/network_manager/native/protocol/ws.rs b/veilid-core/src/network_manager/native/protocol/ws.rs index 36b17e8b..471fde5f 100644 --- a/veilid-core/src/network_manager/native/protocol/ws.rs +++ b/veilid-core/src/network_manager/native/protocol/ws.rs @@ -222,7 +222,7 @@ impl WebsocketProtocolHandler { let mut peek_buf = [0u8; MAX_WS_BEFORE_BODY]; let peek_len = match timeout( self.arc.connection_initial_timeout_ms, - ps.peek(&mut peek_buf), + ps.peek(&mut peek_buf).in_current_span(), ) .await { diff --git a/veilid-core/src/network_manager/native/start_protocols.rs b/veilid-core/src/network_manager/native/start_protocols.rs index fc037de5..082c7daf 100644 --- a/veilid-core/src/network_manager/native/start_protocols.rs +++ b/veilid-core/src/network_manager/native/start_protocols.rs @@ -84,6 +84,7 @@ impl Network { // Returns a port, a set of ip addresses to bind to, and a // bool specifying if multiple ports should be tried + #[instrument(level = "trace", skip_all)] async fn convert_listen_address_to_bind_set( &self, listen_address: String, @@ -136,6 +137,7 @@ impl Network { ///////////////////////////////////////////////////// + #[instrument(level = "trace", skip_all)] pub(super) async fn bind_udp_protocol_handlers( &self, editor_public_internet: &mut RoutingDomainEditor, @@ -249,6 +251,7 @@ impl Network { Ok(StartupDisposition::Success) } + #[instrument(level = "trace", skip_all)] pub(super) async fn start_ws_listeners( &self, editor_public_internet: &mut RoutingDomainEditor, @@ -364,6 +367,7 @@ impl Network { Ok(StartupDisposition::Success) } + #[instrument(level = "trace", skip_all)] pub(super) async fn start_wss_listeners( &self, editor_public_internet: &mut RoutingDomainEditor, @@ -463,6 +467,7 @@ impl Network { Ok(StartupDisposition::Success) } + #[instrument(level = "trace", skip_all)] pub(super) async fn start_tcp_listeners( &self, editor_public_internet: &mut RoutingDomainEditor, diff --git a/veilid-core/src/network_manager/network_connection.rs b/veilid-core/src/network_manager/network_connection.rs index 18a73dc9..ac3f6873 100644 --- a/veilid-core/src/network_manager/network_connection.rs +++ b/veilid-core/src/network_manager/network_connection.rs @@ -136,7 +136,8 @@ impl NetworkConnection { let flow = protocol_connection.flow(); // Create handle for sending - let (sender, receiver) = flume::bounded(get_concurrency() as usize); + //let (sender, receiver) = flume::bounded(get_concurrency() as usize); + let (sender, receiver) = flume::unbounded(); // Create stats let stats = Arc::new(Mutex::new(NetworkConnectionStats { @@ -265,7 +266,7 @@ impl NetworkConnection { // Connection receiver loop #[allow(clippy::too_many_arguments)] - #[instrument(level="trace", target="net", skip_all)] + #[instrument(parent = None, level="trace", target="net", skip_all)] fn process_connection( connection_manager: ConnectionManager, local_stop_token: StopToken, @@ -299,7 +300,7 @@ impl NetworkConnection { }; let timer = MutableFuture::new(new_timer()); - unord.push(system_boxed(timer.clone())); + unord.push(system_boxed(timer.clone().in_current_span())); loop { // Add another message sender future if necessary @@ -333,7 +334,7 @@ impl NetworkConnection { RecvLoopAction::Finish } } - }); + }.in_current_span()); unord.push(system_boxed(sender_fut.in_current_span())); } diff --git a/veilid-core/src/network_manager/receipt_manager.rs b/veilid-core/src/network_manager/receipt_manager.rs index 45c9c38b..c0e53a9d 100644 --- a/veilid-core/src/network_manager/receipt_manager.rs +++ b/veilid-core/src/network_manager/receipt_manager.rs @@ -281,7 +281,13 @@ impl ReceiptManager { } } - #[instrument(level = "trace", target = "receipt", skip_all, err)] + #[instrument( + level = "trace", + target = "receipt", + name = "ReceiptManager::tick", + skip_all, + err + )] pub async fn tick(&self) -> EyreResult<()> { let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { return Ok(()); @@ -308,8 +314,9 @@ impl ReceiptManager { "receipt timeout", self.clone() .timeout_task_routine(now, stop_token) - .in_current_span(), + .instrument(trace_span!(parent: None, "receipt timeout task")), ) + .in_current_span() .await; } } diff --git a/veilid-core/src/network_manager/send_data.rs b/veilid-core/src/network_manager/send_data.rs index 503be4ab..722610a4 100644 --- a/veilid-core/src/network_manager/send_data.rs +++ b/veilid-core/src/network_manager/send_data.rs @@ -1,4 +1,5 @@ use super::*; +use stop_token::future::FutureExt as _; impl NetworkManager { /// Send raw data to a node @@ -146,7 +147,7 @@ impl NetworkManager { Ok(NetworkResult::value(send_data_method)) } - .instrument(trace_span!("send_data")), + .in_current_span() ) } @@ -559,6 +560,12 @@ impl NetworkManager { target_nr: NodeRef, data: Vec, ) -> EyreResult> { + + // Detect if network is stopping so we can break out of this + let Some(stop_token) = self.unlocked_inner.startup_lock.stop_token() else { + return Ok(NetworkResult::service_unavailable("network is stopping")); + }; + // Build a return receipt for the signal let receipt_timeout = ms_to_us( self.unlocked_inner @@ -588,30 +595,38 @@ impl NetworkManager { let rpc = self.rpc_processor(); network_result_try!(rpc .rpc_call_signal( - Destination::relay(relay_nr, target_nr.clone()), + Destination::relay(relay_nr.clone(), target_nr.clone()), SignalInfo::ReverseConnect { receipt, peer_info }, ) .await .wrap_err("failed to send signal")?); // Wait for the return receipt - let inbound_nr = match eventual_value.await.take_value().unwrap() { - ReceiptEvent::ReturnedPrivate { private_route: _ } - | ReceiptEvent::ReturnedOutOfBand - | ReceiptEvent::ReturnedSafety => { - return Ok(NetworkResult::invalid_message( - "reverse connect receipt should be returned in-band", - )); + let inbound_nr = match eventual_value.timeout_at(stop_token).in_current_span().await { + Err(_) => { + return Ok(NetworkResult::service_unavailable("network is stopping")); } - ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, - ReceiptEvent::Expired => { - return Ok(NetworkResult::timeout()); - } - ReceiptEvent::Cancelled => { - return Ok(NetworkResult::no_connection_other(format!( - "reverse connect receipt cancelled from {}", - target_nr - ))) + Ok(v) => { + let receipt_event = v.take_value().unwrap(); + match receipt_event { + ReceiptEvent::ReturnedPrivate { private_route: _ } + | ReceiptEvent::ReturnedOutOfBand + | ReceiptEvent::ReturnedSafety => { + return Ok(NetworkResult::invalid_message( + "reverse connect receipt should be returned in-band", + )); + } + ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, + ReceiptEvent::Expired => { + return Ok(NetworkResult::timeout()); + } + ReceiptEvent::Cancelled => { + return Ok(NetworkResult::no_connection_other(format!( + "reverse connect receipt cancelled from {}", + target_nr + ))) + } + } } }; @@ -634,7 +649,9 @@ impl NetworkManager { )), } } else { - bail!("no reverse connection available") + return Ok(NetworkResult::no_connection_other(format!( + "reverse connection dropped from {}", target_nr) + )); } } @@ -648,6 +665,11 @@ impl NetworkManager { target_nr: NodeRef, data: Vec, ) -> EyreResult> { + // Detect if network is stopping so we can break out of this + let Some(stop_token) = self.unlocked_inner.startup_lock.stop_token() else { + return Ok(NetworkResult::service_unavailable("network is stopping")); + }; + // Ensure we are filtered down to UDP (the only hole punch protocol supported today) assert!(target_nr .filter_ref() @@ -706,23 +728,31 @@ impl NetworkManager { .wrap_err("failed to send signal")?); // Wait for the return receipt - let inbound_nr = match eventual_value.await.take_value().unwrap() { - ReceiptEvent::ReturnedPrivate { private_route: _ } - | ReceiptEvent::ReturnedOutOfBand - | ReceiptEvent::ReturnedSafety => { - return Ok(NetworkResult::invalid_message( - "hole punch receipt should be returned in-band", - )); + let inbound_nr = match eventual_value.timeout_at(stop_token).in_current_span().await { + Err(_) => { + return Ok(NetworkResult::service_unavailable("network is stopping")); } - ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, - ReceiptEvent::Expired => { - return Ok(NetworkResult::timeout()); - } - ReceiptEvent::Cancelled => { - return Ok(NetworkResult::no_connection_other(format!( - "hole punch receipt cancelled from {}", - target_nr - ))) + Ok(v) => { + let receipt_event = v.take_value().unwrap(); + match receipt_event { + ReceiptEvent::ReturnedPrivate { private_route: _ } + | ReceiptEvent::ReturnedOutOfBand + | ReceiptEvent::ReturnedSafety => { + return Ok(NetworkResult::invalid_message( + "hole punch receipt should be returned in-band", + )); + } + ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, + ReceiptEvent::Expired => { + return Ok(NetworkResult::timeout()); + } + ReceiptEvent::Cancelled => { + return Ok(NetworkResult::no_connection_other(format!( + "hole punch receipt cancelled from {}", + target_nr + ))) + } + } } }; @@ -749,7 +779,9 @@ impl NetworkManager { )), } } else { - bail!("no hole punch available") + return Ok(NetworkResult::no_connection_other(format!( + "hole punch dropped from {}", target_nr) + )); } } } diff --git a/veilid-core/src/network_manager/tasks/mod.rs b/veilid-core/src/network_manager/tasks/mod.rs index 0556bdb6..e663204d 100644 --- a/veilid-core/src/network_manager/tasks/mod.rs +++ b/veilid-core/src/network_manager/tasks/mod.rs @@ -48,6 +48,7 @@ impl NetworkManager { } } + #[instrument(level = "trace", name = "NetworkManager::tick", skip_all, err)] pub async fn tick(&self) -> EyreResult<()> { let routing_table = self.routing_table(); let net = self.net(); diff --git a/veilid-core/src/network_manager/tasks/public_address_check.rs b/veilid-core/src/network_manager/tasks/public_address_check.rs index e62567a6..fdb92262 100644 --- a/veilid-core/src/network_manager/tasks/public_address_check.rs +++ b/veilid-core/src/network_manager/tasks/public_address_check.rs @@ -2,10 +2,10 @@ use super::*; impl NetworkManager { // Clean up the public address check tables, removing entries that have timed out - #[instrument(level = "trace", skip(self), err)] + #[instrument(parent = None, level = "trace", skip_all, err)] pub(crate) async fn public_address_check_task_routine( self, - stop_token: StopToken, + _stop_token: StopToken, _last_ts: Timestamp, cur_ts: Timestamp, ) -> EyreResult<()> { diff --git a/veilid-core/src/network_manager/wasm/mod.rs b/veilid-core/src/network_manager/wasm/mod.rs index 401af750..f116baa5 100644 --- a/veilid-core/src/network_manager/wasm/mod.rs +++ b/veilid-core/src/network_manager/wasm/mod.rs @@ -510,6 +510,7 @@ impl Network { } ////////////////////////////////////////// + #[instrument(level = "trace", target = "net", name = "Network::tick", skip_all, err)] pub(crate) async fn tick(&self) -> EyreResult<()> { let Ok(_guard) = self.unlocked_inner.startup_lock.enter() else { log_net!(debug "ignoring due to not started up"); diff --git a/veilid-core/src/routing_table/tasks/mod.rs b/veilid-core/src/routing_table/tasks/mod.rs index f90fa8e4..31a838b7 100644 --- a/veilid-core/src/routing_table/tasks/mod.rs +++ b/veilid-core/src/routing_table/tasks/mod.rs @@ -112,6 +112,7 @@ impl RoutingTable { /// Ticks about once per second /// to run tick tasks which may run at slower tick rates as configured + #[instrument(level = "trace", name = "RoutingTable::tick", skip_all, err)] pub async fn tick(&self) -> EyreResult<()> { // Don't tick if paused let opt_tick_guard = { diff --git a/veilid-core/src/routing_table/tasks/ping_validator.rs b/veilid-core/src/routing_table/tasks/ping_validator.rs index c95bd888..077a07d0 100644 --- a/veilid-core/src/routing_table/tasks/ping_validator.rs +++ b/veilid-core/src/routing_table/tasks/ping_validator.rs @@ -237,8 +237,6 @@ impl RoutingTable { _last_ts: Timestamp, cur_ts: Timestamp, ) -> EyreResult<()> { - eprintln!("pv tick"); - let mut futurequeue: VecDeque = VecDeque::new(); // PublicInternet @@ -253,9 +251,19 @@ impl RoutingTable { let mut unord = FuturesUnordered::new(); while !unord.is_empty() || !futurequeue.is_empty() { - log_rtab!(debug "Ping validation queue: {} remaining, {} in progress", futurequeue.len(), unord.len()); + log_rtab!( + "Ping validation queue: {} remaining, {} in progress", + futurequeue.len(), + unord.len() + ); + // Process one unordered futures if we have some - match unord.next().timeout_at(stop_token.clone()).await { + match unord + .next() + .timeout_at(stop_token.clone()) + .in_current_span() + .await + { Ok(Some(_)) => { // Some ping completed } @@ -273,7 +281,7 @@ impl RoutingTable { let Some(fq) = futurequeue.pop_front() else { break; }; - unord.push(fq.in_current_span()); + unord.push(fq); } } diff --git a/veilid-core/src/rpc_processor/fanout_call.rs b/veilid-core/src/rpc_processor/fanout_call.rs index a8e7cff9..0b9bc509 100644 --- a/veilid-core/src/rpc_processor/fanout_call.rs +++ b/veilid-core/src/rpc_processor/fanout_call.rs @@ -321,13 +321,17 @@ where } } // Wait for them to complete - timeout(timeout_ms, async { - while let Some(is_done) = unord.next().await { - if is_done { - break; + timeout( + timeout_ms, + async { + while let Some(is_done) = unord.next().in_current_span().await { + if is_done { + break; + } } } - }) + .in_current_span(), + ) .await .into_timeout_or() .map(|_| { diff --git a/veilid-core/src/rpc_processor/mod.rs b/veilid-core/src/rpc_processor/mod.rs index f1b4b4ed..8b342f7c 100644 --- a/veilid-core/src/rpc_processor/mod.rs +++ b/veilid-core/src/rpc_processor/mod.rs @@ -277,7 +277,7 @@ enum RPCKind { ///////////////////////////////////////////////////////////////////// struct RPCProcessorInner { - send_channel: Option, RPCMessageEncoded)>>, + send_channel: Option>, stop_source: Option, worker_join_handles: Vec>, } @@ -590,7 +590,7 @@ impl RPCProcessor { }; Ok(nr) - }) + }.in_current_span()) } #[instrument(level="trace", target="rpc", skip_all)] @@ -1662,10 +1662,13 @@ impl RPCProcessor { RPCStatementDetail::AppMessage(_) => self.process_app_message(msg).await, }, RPCOperationKind::Answer(_) => { - self.unlocked_inner + let op_id = msg.operation.op_id(); + if let Err(e) = self.unlocked_inner .waiting_rpc_table - .complete_op_waiter(msg.operation.op_id(), msg) - .await?; + .complete_op_waiter(op_id, msg) { + log_rpc!(debug "Operation id {} did not complete: {}", op_id, e); + // Don't throw an error here because it's okay if the original operation timed out + } Ok(NetworkResult::value(())) } } @@ -1675,13 +1678,16 @@ impl RPCProcessor { async fn rpc_worker( self, stop_token: StopToken, - receiver: flume::Receiver<(Option, RPCMessageEncoded)>, + receiver: flume::Receiver<(Span, RPCMessageEncoded)>, ) { - while let Ok(Ok((_span_id, msg))) = + while let Ok(Ok((prev_span, msg))) = receiver.recv_async().timeout_at(stop_token.clone()).await { + let rpc_message_span = tracing::trace_span!("rpc message"); + rpc_message_span.follows_from(prev_span); + network_result_value_or_log!(match self - .process_rpc_message(msg).in_current_span() + .process_rpc_message(msg).instrument(rpc_message_span) .await { Err(e) => { @@ -1730,9 +1736,8 @@ impl RPCProcessor { }; send_channel }; - let span_id = Span::current().id(); send_channel - .try_send((span_id, msg)) + .try_send((Span::current(), msg)) .map_err(|e| eyre!("failed to enqueue direct RPC message: {}", e))?; Ok(()) } @@ -1766,9 +1771,8 @@ impl RPCProcessor { }; send_channel }; - let span_id = Span::current().id(); send_channel - .try_send((span_id, msg)) + .try_send((Span::current(), msg)) .map_err(|e| eyre!("failed to enqueue safety routed RPC message: {}", e))?; Ok(()) } @@ -1805,9 +1809,8 @@ impl RPCProcessor { }; send_channel }; - let span_id = Span::current().id(); send_channel - .try_send((span_id, msg)) + .try_send((Span::current(), msg)) .map_err(|e| eyre!("failed to enqueue private routed RPC message: {}", e))?; Ok(()) } diff --git a/veilid-core/src/rpc_processor/operation_waiter.rs b/veilid-core/src/rpc_processor/operation_waiter.rs index 0bf5ba01..9c24708b 100644 --- a/veilid-core/src/rpc_processor/operation_waiter.rs +++ b/veilid-core/src/rpc_processor/operation_waiter.rs @@ -8,7 +8,7 @@ where { waiter: OperationWaiter, op_id: OperationId, - eventual_instance: Option, T)>>, + result_receiver: Option>, } impl Drop for OperationWaitHandle @@ -17,7 +17,7 @@ where C: Unpin + Clone, { fn drop(&mut self) { - if self.eventual_instance.is_some() { + if self.result_receiver.is_some() { self.waiter.cancel_op_waiter(self.op_id); } } @@ -31,7 +31,7 @@ where { context: C, timestamp: Timestamp, - eventual: EventualValue<(Option, T)>, + result_sender: flume::Sender<(Span, T)>, } #[derive(Debug)] @@ -80,11 +80,11 @@ where /// Set up wait for operation to complete pub fn add_op_waiter(&self, op_id: OperationId, context: C) -> OperationWaitHandle { let mut inner = self.inner.lock(); - let e = EventualValue::new(); + let (result_sender, result_receiver) = flume::bounded(1); let waiting_op = OperationWaitingOp { context, timestamp: get_aligned_timestamp(), - eventual: e.clone(), + result_sender, }; if inner.waiting_op_table.insert(op_id, waiting_op).is_some() { error!( @@ -96,7 +96,7 @@ where OperationWaitHandle { waiter: self.clone(), op_id, - eventual_instance: Some(e.instance()), + result_receiver: Some(result_receiver), } } @@ -122,14 +122,15 @@ where } /// Remove wait for op + #[instrument(level = "trace", target = "rpc", skip_all)] fn cancel_op_waiter(&self, op_id: OperationId) { let mut inner = self.inner.lock(); inner.waiting_op_table.remove(&op_id); } - /// Complete the app call + /// Complete the waiting op #[instrument(level = "trace", target = "rpc", skip_all)] - pub async fn complete_op_waiter(&self, op_id: OperationId, message: T) -> Result<(), RPCError> { + pub fn complete_op_waiter(&self, op_id: OperationId, message: T) -> Result<(), RPCError> { let waiting_op = { let mut inner = self.inner.lock(); inner @@ -141,10 +142,9 @@ where )))? }; waiting_op - .eventual - .resolve((Span::current().id(), message)) - .await; - Ok(()) + .result_sender + .send((Span::current(), message)) + .map_err(RPCError::ignore) } /// Wait for operation to complete @@ -156,29 +156,30 @@ where ) -> Result, RPCError> { let timeout_ms = us_to_ms(timeout_us.as_u64()).map_err(RPCError::internal)?; - // Take the instance + // Take the receiver // After this, we must manually cancel since the cancel on handle drop is disabled - let eventual_instance = handle.eventual_instance.take().unwrap(); + let result_receiver = handle.result_receiver.take().unwrap(); + + let result_fut = result_receiver.recv_async().in_current_span(); // wait for eventualvalue let start_ts = get_aligned_timestamp(); - let res = timeout(timeout_ms, eventual_instance) - .await - .into_timeout_or(); - Ok(res - .on_timeout(|| { - // log_rpc!(debug "op wait timed out: {}", handle.op_id); - // debug_print_backtrace(); + let res = timeout(timeout_ms, result_fut).await.into_timeout_or(); + + match res { + TimeoutOr::Timeout => { self.cancel_op_waiter(handle.op_id); - }) - .map(|res| { - let (_span_id, ret) = res.take_value().unwrap(); + Ok(TimeoutOr::Timeout) + } + TimeoutOr::Value(Ok((_span_id, ret))) => { let end_ts = get_aligned_timestamp(); //xxx: causes crash (Missing otel data span extensions) // Span::current().follows_from(span_id); - (ret, end_ts.saturating_sub(start_ts)) - })) + Ok(TimeoutOr::Value((ret, end_ts.saturating_sub(start_ts)))) + } + TimeoutOr::Value(Err(e)) => Err(RPCError::ignore(e)), + } } } diff --git a/veilid-core/src/rpc_processor/rpc_app_call.rs b/veilid-core/src/rpc_processor/rpc_app_call.rs index cde79fe4..e5e4eaf6 100644 --- a/veilid-core/src/rpc_processor/rpc_app_call.rs +++ b/veilid-core/src/rpc_processor/rpc_app_call.rs @@ -153,11 +153,7 @@ impl RPCProcessor { /// Exposed to API for apps to return app call answers #[instrument(level = "trace", target = "rpc", skip_all)] - pub async fn app_call_reply( - &self, - call_id: OperationId, - message: Vec, - ) -> Result<(), RPCError> { + pub fn app_call_reply(&self, call_id: OperationId, message: Vec) -> Result<(), RPCError> { let _guard = self .unlocked_inner .startup_lock @@ -166,6 +162,6 @@ impl RPCProcessor { self.unlocked_inner .waiting_app_call_table .complete_op_waiter(call_id, message) - .await + .map_err(RPCError::ignore) } } diff --git a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs index 02fbfaed..95f2e0f9 100644 --- a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs +++ b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs @@ -15,6 +15,11 @@ impl RPCProcessor { .startup_lock .enter() .map_err(RPCError::map_try_again("not started up"))?; + let stop_token = self + .unlocked_inner + .startup_lock + .stop_token() + .ok_or(RPCError::try_again("not started up"))?; let network_manager = self.network_manager(); let receipt_time = ms_to_us(self.unlocked_inner.validate_dial_info_receipt_time_ms); @@ -38,23 +43,35 @@ impl RPCProcessor { ); // Wait for receipt - match eventual_value.await.take_value().unwrap() { - ReceiptEvent::ReturnedPrivate { private_route: _ } - | ReceiptEvent::ReturnedInBand { inbound_noderef: _ } - | ReceiptEvent::ReturnedSafety => { - log_net!(debug "validate_dial_info receipt should be returned out-of-band"); - Ok(false) + match eventual_value + .timeout_at(stop_token) + .in_current_span() + .await + { + Err(_) => { + return Err(RPCError::try_again("not started up")); } - ReceiptEvent::ReturnedOutOfBand => { - log_net!(debug "validate_dial_info receipt returned"); - Ok(true) - } - ReceiptEvent::Expired => { - log_net!(debug "validate_dial_info receipt expired"); - Ok(false) - } - ReceiptEvent::Cancelled => { - Err(RPCError::internal("receipt was dropped before expiration")) + Ok(v) => { + let receipt_event = v.take_value().unwrap(); + match receipt_event { + ReceiptEvent::ReturnedPrivate { private_route: _ } + | ReceiptEvent::ReturnedInBand { inbound_noderef: _ } + | ReceiptEvent::ReturnedSafety => { + log_net!(debug "validate_dial_info receipt should be returned out-of-band"); + Ok(false) + } + ReceiptEvent::ReturnedOutOfBand => { + log_net!(debug "validate_dial_info receipt returned"); + Ok(true) + } + ReceiptEvent::Expired => { + log_net!(debug "validate_dial_info receipt expired"); + Ok(false) + } + ReceiptEvent::Cancelled => { + Err(RPCError::internal("receipt was dropped before expiration")) + } + } } } } diff --git a/veilid-core/src/storage_manager/get_value.rs b/veilid-core/src/storage_manager/get_value.rs index ef2eb2e6..675aef6f 100644 --- a/veilid-core/src/storage_manager/get_value.rs +++ b/veilid-core/src/storage_manager/get_value.rs @@ -182,7 +182,7 @@ impl StorageManager { log_network_result!(debug "GetValue fanout call returned peers {}", gva.answer.peers.len()); Ok(NetworkResult::value(gva.answer.peers)) - }.in_current_span() + }.instrument(tracing::trace_span!("outbound_get_value fanout routine")) } }; @@ -271,7 +271,7 @@ impl StorageManager { })) { log_dht!(debug "Sending GetValue result failed: {}", e); } - }.in_current_span())) + }.instrument(tracing::trace_span!("outbound_get_value result")))) .detach(); Ok(out_rx) @@ -319,7 +319,7 @@ impl StorageManager { // Return done false - }.in_current_span()) + }.instrument(tracing::trace_span!("outbound_get_value deferred results"))) }, ), ); diff --git a/veilid-core/src/storage_manager/inspect_value.rs b/veilid-core/src/storage_manager/inspect_value.rs index 1ec7761e..7483caaf 100644 --- a/veilid-core/src/storage_manager/inspect_value.rs +++ b/veilid-core/src/storage_manager/inspect_value.rs @@ -228,7 +228,7 @@ impl StorageManager { log_network_result!(debug "InspectValue fanout call returned peers {}", answer.peers.len()); Ok(NetworkResult::value(answer.peers)) - }.in_current_span() + }.instrument(tracing::trace_span!("outbound_inspect_value fanout call")) }; // Routine to call to check if we're done at each step diff --git a/veilid-core/src/storage_manager/set_value.rs b/veilid-core/src/storage_manager/set_value.rs index dd77a1c5..d673c3c5 100644 --- a/veilid-core/src/storage_manager/set_value.rs +++ b/veilid-core/src/storage_manager/set_value.rs @@ -177,7 +177,7 @@ impl StorageManager { ctx.send_partial_update = true; Ok(NetworkResult::value(sva.answer.peers)) - }.in_current_span() + }.instrument(tracing::trace_span!("fanout call_routine")) } }; @@ -267,7 +267,7 @@ impl StorageManager { })) { log_dht!(debug "Sending SetValue result failed: {}", e); } - }.in_current_span())) + }.instrument(tracing::trace_span!("outbound_set_value fanout routine")))) .detach(); Ok(out_rx) @@ -329,7 +329,7 @@ impl StorageManager { // Return done false - }.in_current_span()) + }.instrument(tracing::trace_span!("outbound_set_value deferred results"))) }, ), ); diff --git a/veilid-core/src/storage_manager/tasks/mod.rs b/veilid-core/src/storage_manager/tasks/mod.rs index dc580860..c128cfec 100644 --- a/veilid-core/src/storage_manager/tasks/mod.rs +++ b/veilid-core/src/storage_manager/tasks/mod.rs @@ -80,6 +80,7 @@ impl StorageManager { } } + #[instrument(parent = None, level = "trace", target = "stor", name = "StorageManager::tick", skip_all, err)] pub async fn tick(&self) -> EyreResult<()> { // Run the flush stores task self.unlocked_inner.flush_record_stores_task.tick().await?; @@ -109,6 +110,7 @@ impl StorageManager { Ok(()) } + #[instrument(level = "trace", target = "stor", skip_all)] pub(crate) async fn cancel_tasks(&self) { log_stor!(debug "stopping check watched records task"); if let Err(e) = self.unlocked_inner.check_watched_records_task.stop().await { diff --git a/veilid-core/src/storage_manager/tasks/send_value_changes.rs b/veilid-core/src/storage_manager/tasks/send_value_changes.rs index 457fc897..98a43c24 100644 --- a/veilid-core/src/storage_manager/tasks/send_value_changes.rs +++ b/veilid-core/src/storage_manager/tasks/send_value_changes.rs @@ -32,15 +32,24 @@ impl StorageManager { // Add a future for each value change for vc in value_changes { let this = self.clone(); - unord.push(async move { - if let Err(e) = this.send_value_change(vc).await { - log_stor!(debug "Failed to send value change: {}", e); + unord.push( + async move { + if let Err(e) = this.send_value_change(vc).await { + log_stor!(debug "Failed to send value change: {}", e); + } } - }); + .in_current_span(), + ); } while !unord.is_empty() { - match unord.next().timeout_at(stop_token.clone()).await { + match unord + .next() + .in_current_span() + .timeout_at(stop_token.clone()) + .in_current_span() + .await + { Ok(Some(_)) => { // Some ValueChanged completed } diff --git a/veilid-core/src/storage_manager/watch_value.rs b/veilid-core/src/storage_manager/watch_value.rs index 628f5b1f..e7a7f5aa 100644 --- a/veilid-core/src/storage_manager/watch_value.rs +++ b/veilid-core/src/storage_manager/watch_value.rs @@ -294,7 +294,7 @@ impl StorageManager { log_network_result!(debug "WatchValue fanout call returned peers {} ({})", wva.answer.peers.len(), next_node); Ok(NetworkResult::value(wva.answer.peers)) - }.in_current_span() + }.instrument(tracing::trace_span!("outbound_watch_value call routine")) }; // Routine to call to check if we're done at each step diff --git a/veilid-core/src/veilid_api/api.rs b/veilid-core/src/veilid_api/api.rs index a4179422..0bcf58e1 100644 --- a/veilid-core/src/veilid_api/api.rs +++ b/veilid-core/src/veilid_api/api.rs @@ -366,7 +366,6 @@ impl VeilidAPI { let rpc_processor = self.rpc_processor()?; rpc_processor .app_call_reply(call_id, message) - .await .map_err(|e| e.into()) } diff --git a/veilid-server/Cargo.toml b/veilid-server/Cargo.toml index 9feee64f..da53e02a 100644 --- a/veilid-server/Cargo.toml +++ b/veilid-server/Cargo.toml @@ -38,6 +38,7 @@ rt-tokio = [ ] tracking = ["veilid-core/tracking"] debug-json-api = [] +debug-locks = ["veilid-core/debug-locks"] [dependencies] veilid-core = { path = "../veilid-core", default-features = false } diff --git a/veilid-server/src/server.rs b/veilid-server/src/server.rs index 0a21dbc0..20820737 100644 --- a/veilid-server/src/server.rs +++ b/veilid-server/src/server.rs @@ -108,25 +108,29 @@ pub async fn run_veilid_server( let capi2 = capi.clone(); let update_receiver_shutdown = SingleShotEventual::new(Some(())); let mut update_receiver_shutdown_instance = update_receiver_shutdown.instance().fuse(); - let update_receiver_jh = spawn_local("update_receiver", async move { - loop { - select! { - res = receiver.recv_async() => { - if let Ok(change) = res { - if let Some(capi) = &capi2 { - // Handle state changes on main thread for capnproto rpc - capi.clone().handle_update(change); + let update_receiver_jh = spawn_local( + "update_receiver", + async move { + loop { + select! { + res = receiver.recv_async() => { + if let Ok(change) = res { + if let Some(capi) = &capi2 { + // Handle state changes on main thread for capnproto rpc + capi.clone().handle_update(change); + } + } else { + break; } - } else { + } + _ = update_receiver_shutdown_instance => { break; } - } - _ = update_receiver_shutdown_instance => { - break; - } - }; + }; + } } - }); + .in_current_span(), + ); // Auto-attach if desired let mut out = Ok(()); diff --git a/veilid-tools/src/eventual.rs b/veilid-tools/src/eventual.rs index a0a49a47..33d5c51b 100644 --- a/veilid-tools/src/eventual.rs +++ b/veilid-tools/src/eventual.rs @@ -104,7 +104,7 @@ where match out { None => task::Poll::::Pending, Some(wakers) => { - // Wake all EventualResolvedFutures + // Wake all other instance futures for w in wakers { w.wake(); } diff --git a/veilid-tools/src/eventual_value.rs b/veilid-tools/src/eventual_value.rs index 16650f31..8bf93aa6 100644 --- a/veilid-tools/src/eventual_value.rs +++ b/veilid-tools/src/eventual_value.rs @@ -81,7 +81,7 @@ impl Future for EventualValueFuture { match out { None => task::Poll::::Pending, Some(wakers) => { - // Wake all EventualResolvedFutures + // Wake all other instance futures for w in wakers { w.wake(); } diff --git a/veilid-tools/src/eventual_value_clone.rs b/veilid-tools/src/eventual_value_clone.rs index fdaa9cf8..f0c555fb 100644 --- a/veilid-tools/src/eventual_value_clone.rs +++ b/veilid-tools/src/eventual_value_clone.rs @@ -77,7 +77,7 @@ impl Future for EventualValueCloneFuture { match out { None => task::Poll::::Pending, Some(wakers) => { - // Wake all EventualResolvedFutures + // Wake all other instance futures for w in wakers { w.wake(); } diff --git a/veilid-tools/src/lib.rs b/veilid-tools/src/lib.rs index 78fd2c4c..4cb232ac 100644 --- a/veilid-tools/src/lib.rs +++ b/veilid-tools/src/lib.rs @@ -251,6 +251,7 @@ pub mod tests; cfg_if! { if #[cfg(feature = "tracing")] { use tracing::*; + #[macro_export] macro_rules! debug_target_enabled { ($target:expr) => { enabled!(target: $target, Level::DEBUG) } diff --git a/veilid-tools/src/must_join_single_future.rs b/veilid-tools/src/must_join_single_future.rs index e89f000d..858a6575 100644 --- a/veilid-tools/src/must_join_single_future.rs +++ b/veilid-tools/src/must_join_single_future.rs @@ -64,6 +64,7 @@ where } /// Check the result and take it if there is one + #[cfg_attr(feature = "tracing", instrument(level = "trace", skip_all))] pub async fn check(&self) -> Result, ()> { let mut out: Option = None; @@ -95,6 +96,7 @@ where } /// Wait for the result and take it + #[cfg_attr(feature = "tracing", instrument(level = "trace", skip_all))] pub async fn join(&self) -> Result, ()> { let mut out: Option = None; diff --git a/veilid-tools/src/startup_lock.rs b/veilid-tools/src/startup_lock.rs index 04095375..dccd37ae 100644 --- a/veilid-tools/src/startup_lock.rs +++ b/veilid-tools/src/startup_lock.rs @@ -34,20 +34,20 @@ impl<'a> StartupLockGuard<'a> { #[derive(Debug)] pub struct StartupLockEnterGuard<'a> { _guard: AsyncRwLockReadGuard<'a, bool>, - // #[cfg(feature = "debug-locks")] + #[cfg(feature = "debug-locks")] id: usize, - // #[cfg(feature = "debug-locks")] + #[cfg(feature = "debug-locks")] active_guards: Arc>>, } -//#[cfg(feature = "debug-locks")] +#[cfg(feature = "debug-locks")] impl<'a> Drop for StartupLockEnterGuard<'a> { fn drop(&mut self) { self.active_guards.lock().remove(&self.id); } } -//#[cfg(feature = "debug-locks")] +#[cfg(feature = "debug-locks")] static GUARD_ID: AtomicUsize = AtomicUsize::new(0); /// Synchronization mechanism that tracks the startup and shutdown of a region of code. @@ -59,16 +59,18 @@ static GUARD_ID: AtomicUsize = AtomicUsize::new(0); /// asynchronous shutdown to wait for operations to finish before proceeding. #[derive(Debug)] pub struct StartupLock { - rwlock: AsyncRwLock, - // #[cfg(feature = "debug-locks")] + startup_state: AsyncRwLock, + stop_source: Mutex>, + #[cfg(feature = "debug-locks")] active_guards: Arc>>, } impl StartupLock { pub fn new() -> Self { Self { - rwlock: AsyncRwLock::new(false), - // #[cfg(feature = "debug-locks")] + startup_state: AsyncRwLock::new(false), + stop_source: Mutex::new(None), + #[cfg(feature = "debug-locks")] active_guards: Arc::new(Mutex::new(HashMap::new())), } } @@ -77,20 +79,29 @@ impl StartupLock { /// One must call 'success()' on the returned startup lock guard if startup was successful /// otherwise the startup lock will not shift to the 'started' state. pub fn startup(&self) -> Result { - let guard = asyncrwlock_try_write!(self.rwlock).ok_or(StartupLockAlreadyStartedError)?; + let guard = + asyncrwlock_try_write!(self.startup_state).ok_or(StartupLockAlreadyStartedError)?; if *guard { return Err(StartupLockAlreadyStartedError); } + *self.stop_source.lock() = Some(StopSource::new()); + Ok(StartupLockGuard { guard, success_value: true, }) } + /// Get a stop token for this lock + /// One can wait on this to timeout operations when a shutdown is requested + pub fn stop_token(&self) -> Option { + self.stop_source.lock().as_ref().map(|ss| ss.token()) + } + /// Check if this StartupLock is currently in a started state /// Returns false is the state is in transition pub fn is_started(&self) -> bool { - let Some(guard) = asyncrwlock_try_read!(self.rwlock) else { + let Some(guard) = asyncrwlock_try_read!(self.startup_state) else { return false; }; *guard @@ -99,7 +110,7 @@ impl StartupLock { /// Check if this StartupLock is currently in a shut down state /// Returns false is the state is in transition pub fn is_shut_down(&self) -> bool { - let Some(guard) = asyncrwlock_try_read!(self.rwlock) else { + let Some(guard) = asyncrwlock_try_read!(self.startup_state) else { return false; }; !*guard @@ -109,18 +120,20 @@ impl StartupLock { /// One must call 'success()' on the returned startup lock guard if shutdown was successful /// otherwise the startup lock will not shift to the 'stopped' state. pub async fn shutdown(&self) -> Result { + // Drop the stop source to ensure we can detect shutdown has been requested + *self.stop_source.lock() = None; + cfg_if! { if #[cfg(feature = "debug-locks")] { - //let guard = self.rwlock.write().await; + let guard = match timeout(30000, self.startup_state.write()).await { + Ok(v) => v, + Err(_) => { + eprintln!("active guards: {:#?}", self.active_guards.lock().values().collect::>()); + panic!("shutdown deadlock"); + } + }; } else { - let guard = self.rwlock.write().await; - // let guard = match timeout(30000, self.rwlock.write()).await { - // Ok(v) => v, - // Err(_) => { - // eprintln!("active guards: {:#?}", self.active_guards.lock().values().collect::>()); - // panic!("shutdown deadlock"); - // } - // }; + let guard = self.startup_state.write().await; } } if !*guard { @@ -136,19 +149,23 @@ impl StartupLock { /// If this module has not yet started up or is in the process of startup or shutdown /// this will fail. pub fn enter(&self) -> Result { - let guard = asyncrwlock_try_read!(self.rwlock).ok_or(StartupLockNotStartedError)?; + let guard = asyncrwlock_try_read!(self.startup_state).ok_or(StartupLockNotStartedError)?; if !*guard { return Err(StartupLockNotStartedError); } let out = StartupLockEnterGuard { _guard: guard, - //#[cfg(feature = "debug-locks")] + #[cfg(feature = "debug-locks")] id: GUARD_ID.fetch_add(1, Ordering::AcqRel), + #[cfg(feature = "debug-locks")] active_guards: self.active_guards.clone(), }; + + #[cfg(feature = "debug-locks")] self.active_guards .lock() .insert(out.id, backtrace::Backtrace::new()); + Ok(out) } } diff --git a/veilid-tools/src/tick_task.rs b/veilid-tools/src/tick_task.rs index d0f5d9fe..a121a193 100644 --- a/veilid-tools/src/tick_task.rs +++ b/veilid-tools/src/tick_task.rs @@ -104,22 +104,29 @@ impl TickTask { return Ok(()); } - self.internal_tick(now, last_timestamp_us).await.map(drop) + let itick = self.internal_tick(now, last_timestamp_us); + + itick.await.map(drop) } pub async fn try_tick_now(&self) -> Result { let now = get_timestamp(); let last_timestamp_us = self.last_timestamp_us.load(Ordering::Acquire); - self.internal_tick(now, last_timestamp_us).await + let itick = self.internal_tick(now, last_timestamp_us); + + itick.await } async fn internal_tick(&self, now: u64, last_timestamp_us: u64) -> Result { // Lock the stop source, tells us if we have ever started this future - let opt_stop_source = &mut *self.stop_source.lock().await; + let opt_stop_source_fut = self.stop_source.lock(); + + let opt_stop_source = &mut *opt_stop_source_fut.await; + if opt_stop_source.is_some() { // See if the previous execution finished with an error - match self.single_future.check().await { + match self.single_future.check().in_current_span().await { Ok(Some(Err(e))) => { // We have an error result, which means the singlefuture ran but we need to propagate the error return Err(e); @@ -145,15 +152,18 @@ impl TickTask { let stop_token = stop_source.token(); let running = self.running.clone(); let routine = self.routine.get().unwrap()(stop_token, last_timestamp_us, now); + let wrapped_routine = Box::pin(async move { running.store(true, core::sync::atomic::Ordering::Release); let out = routine.await; running.store(false, core::sync::atomic::Ordering::Release); out }); + match self .single_future .single_spawn(&self.name, wrapped_routine) + .in_current_span() .await { // We should have already consumed the result of the last run, or there was none diff --git a/veilid-tools/src/timeout.rs b/veilid-tools/src/timeout.rs index 5203c416..514bb727 100644 --- a/veilid-tools/src/timeout.rs +++ b/veilid-tools/src/timeout.rs @@ -8,7 +8,9 @@ cfg_if! { where F: Future, { - match select(Box::pin(sleep(dur_ms)), Box::pin(f)).await { + let tout = select(Box::pin(sleep(dur_ms)), Box::pin(f)); + + match tout.await { Either::Left((_x, _b)) => Err(TimeoutError()), Either::Right((y, _a)) => Ok(y), } @@ -22,11 +24,13 @@ cfg_if! { { cfg_if! { if #[cfg(feature="rt-async-std")] { - async_std::future::timeout(Duration::from_millis(dur_ms as u64), f).await.map_err(|e| e.into()) + let tout = async_std::future::timeout(Duration::from_millis(dur_ms as u64), f); } else if #[cfg(feature="rt-tokio")] { - tokio::time::timeout(Duration::from_millis(dur_ms as u64), f).await.map_err(|e| e.into()) + let tout = tokio::time::timeout(Duration::from_millis(dur_ms as u64), f); } } + + tout.await.map_err(|e| e.into()) } } diff --git a/veilid-tools/src/tools.rs b/veilid-tools/src/tools.rs index 4536b5dd..b9a1da4c 100644 --- a/veilid-tools/src/tools.rs +++ b/veilid-tools/src/tools.rs @@ -506,20 +506,21 @@ pub fn map_to_string(arg: X) -> String { ////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct DebugGuard { + name: &'static str, counter: &'static AtomicUsize, } impl DebugGuard { - pub fn new(counter: &'static AtomicUsize) -> Self { + pub fn new(name: &'static str, counter: &'static AtomicUsize) -> Self { let c = counter.fetch_add(1, Ordering::SeqCst); - eprintln!("DebugGuard Entered: {}", c + 1); - Self { counter } + eprintln!("{} entered: {}", name, c + 1); + Self { name, counter } } } impl Drop for DebugGuard { fn drop(&mut self) { let c = self.counter.fetch_sub(1, Ordering::SeqCst); - eprintln!("DebugGuard Exited: {}", c - 1); + eprintln!("{} exited: {}", self.name, c - 1); } } From 09bda21ee6b2f9e827bab3229f4d578318d7e0a3 Mon Sep 17 00:00:00 2001 From: John Smith Date: Sun, 21 Jul 2024 21:34:34 -0400 Subject: [PATCH 08/13] fix linux --- veilid-tools/src/network_interfaces/netlink.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/veilid-tools/src/network_interfaces/netlink.rs b/veilid-tools/src/network_interfaces/netlink.rs index f887d115..409e4fc4 100644 --- a/veilid-tools/src/network_interfaces/netlink.rs +++ b/veilid-tools/src/network_interfaces/netlink.rs @@ -317,7 +317,7 @@ impl PlatformSupportNetlink { let (connection, handle, _) = new_connection_with_socket::()?; // Spawn a connection handler - let connection_jh = spawn(connection); + let connection_jh = spawn("rtnetlink connection", connection); // Save the connection self.connection_jh = Some(connection_jh); From f0b1b6535caf84808fef0fd2552ccccfe899f14e Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sun, 21 Jul 2024 22:14:04 -0400 Subject: [PATCH 09/13] spawn work --- veilid-tools/src/spawn.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/veilid-tools/src/spawn.rs b/veilid-tools/src/spawn.rs index 5b494c1c..fcebb692 100644 --- a/veilid-tools/src/spawn.rs +++ b/veilid-tools/src/spawn.rs @@ -53,7 +53,7 @@ cfg_if! { { cfg_if! { if #[cfg(feature="rt-async-std")] { - MustJoinHandle::new(async_std::task::Builder::new().name(name).spawn(future).unwrap()) + MustJoinHandle::new(async_std::task::Builder::new().name(name.to_string()).spawn(future).unwrap()) } else if #[cfg(feature="rt-tokio")] { MustJoinHandle::new(tokio::task::Builder::new().name(name).spawn(future).unwrap()) } @@ -66,7 +66,7 @@ cfg_if! { { cfg_if! { if #[cfg(feature="rt-async-std")] { - MustJoinHandle::new(async_std::task::Builder::new().name(name).local(future).unwrap()) + MustJoinHandle::new(async_std::task::Builder::new().name(name.to_string()).local(future).unwrap()) } else if #[cfg(feature="rt-tokio")] { MustJoinHandle::new(tokio::task::Builder::new().name(name).spawn_local(future).unwrap()) } @@ -79,7 +79,7 @@ cfg_if! { { cfg_if! { if #[cfg(feature="rt-async-std")] { - drop(async_std::task::Builder::new().name(name).spawn(future).unwrap()); + drop(async_std::task::Builder::new().name(name.to_string()).spawn(future).unwrap()); } else if #[cfg(feature="rt-tokio")] { drop(tokio::task::Builder::new().name(name).spawn(future).unwrap()); } @@ -92,7 +92,7 @@ cfg_if! { { cfg_if! { if #[cfg(feature="rt-async-std")] { - drop(async_std::task::Builder::new().name(name).local(future).unwrap()); + drop(async_std::task::Builder::new().name(name.to_string()).local(future).unwrap()); } else if #[cfg(feature="rt-tokio")] { drop(tokio::task::Builder::new().name(name).spawn_local(future).unwrap()); } @@ -108,7 +108,9 @@ cfg_if! { // run blocking stuff in blocking thread cfg_if! { if #[cfg(feature="rt-async-std")] { - async_std::task::Builder::new().name(name).blocking(blocking_task) + let _name = name; + // async_std::task::Builder blocking doesn't work like spawn_blocking() + async_std::task::spawn_blocking(blocking_task).await } else if #[cfg(feature="rt-tokio")] { tokio::task::Builder::new().name(name).spawn_blocking(blocking_task).unwrap().await.unwrap_or(err_result) } else { From 859d2214f72e817c015ab46a15ae6605bc5c51cb Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Sun, 21 Jul 2024 22:23:55 -0400 Subject: [PATCH 10/13] fix features --- Cargo.lock | 110 +++++++++++++++++++++------------------- veilid-tools/Cargo.toml | 2 +- 2 files changed, 59 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21c7f3ec..c5393779 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrayref" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" @@ -470,7 +470,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -500,7 +500,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -745,7 +745,7 @@ checksum = "e0b121a9fe0df916e362fb3271088d071159cdf11db0e4182d02152850756eff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -831,6 +831,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.6.1" @@ -857,9 +863,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" [[package]] name = "cesu8" @@ -1016,7 +1022,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1375,7 +1381,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1488,7 +1494,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1544,7 +1550,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1566,7 +1572,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core 0.20.10", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1733,7 +1739,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1753,7 +1759,7 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1796,7 +1802,7 @@ dependencies = [ "darling 0.20.10", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2034,7 +2040,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2146,7 +2152,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2686,12 +2692,12 @@ dependencies = [ [[package]] name = "image" -version = "0.25.1" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" +checksum = "99314c8a2152b8ddb211f924cdae532d8c5e4c8bb54728e12fff1b0cd5963a10" dependencies = [ "bytemuck", - "byteorder", + "byteorder-lite", "num-traits", "png", "tiff", @@ -2975,9 +2981,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if 1.0.0", "windows-targets 0.52.6", @@ -4017,7 +4023,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4068,7 +4074,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4167,9 +4173,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" [[package]] name = "portable-atomic-util" @@ -4199,7 +4205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4267,7 +4273,7 @@ dependencies = [ "prost 0.12.6", "prost-types", "regex", - "syn 2.0.71", + "syn 2.0.72", "tempfile", ] @@ -4294,7 +4300,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4754,7 +4760,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals 0.29.1", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4781,9 +4787,9 @@ dependencies = [ [[package]] name = "sdd" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb0dde0ccd15e337a3cf738a9a38115c6d8e74795d074e73973dad3d229a897" +checksum = "85f05a494052771fc5bd0619742363b5e24e5ad72ab3111ec2e27925b8edc5f3" [[package]] name = "secret-service" @@ -4916,7 +4922,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4927,7 +4933,7 @@ checksum = "e578a843d40b4189a4d66bba51d7684f57da5bd7c304c64e14bd63efbef49509" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4938,7 +4944,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -4960,7 +4966,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5021,7 +5027,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5032,7 +5038,7 @@ checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5284,9 +5290,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.71" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -5381,7 +5387,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5510,7 +5516,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5589,7 +5595,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.13", + "winnow 0.6.14", ] [[package]] @@ -5711,7 +5717,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -5914,7 +5920,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals 0.28.0", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6490,7 +6496,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -6524,7 +6530,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6557,7 +6563,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -6977,9 +6983,9 @@ dependencies = [ [[package]] name = "winnow" -version = "0.6.13" +version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" +checksum = "374ec40a2d767a3c1b4972d9475ecd557356637be906f2cb3f7fe17a6eb5e22f" dependencies = [ "memchr", ] @@ -7124,7 +7130,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -7144,7 +7150,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] diff --git a/veilid-tools/Cargo.toml b/veilid-tools/Cargo.toml index f6c2d046..cc5b8011 100644 --- a/veilid-tools/Cargo.toml +++ b/veilid-tools/Cargo.toml @@ -33,7 +33,7 @@ rt-wasm-bindgen = ["async_executors/bindgen", "async_executors/timer"] veilid_tools_android_tests = ["dep:paranoid-android"] veilid_tools_ios_tests = ["dep:tracing", "dep:oslog", "dep:tracing-oslog"] -tracing = ["dep:tracing", "dep:tracing-subscriber"] +tracing = ["dep:tracing", "dep:tracing-subscriber", "tokio/tracing"] debug-locks = [] [dependencies] From e4ce42ec085d33405fed8bd7606cb0586c35a7a8 Mon Sep 17 00:00:00 2001 From: John Smith Date: Sun, 21 Jul 2024 23:27:38 -0400 Subject: [PATCH 11/13] use cmake from kitware --- Earthfile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Earthfile b/Earthfile index 02eafc22..282d65c7 100644 --- a/Earthfile +++ b/Earthfile @@ -16,6 +16,8 @@ VERSION 0.7 FROM ubuntu:18.04 ENV ZIG_VERSION=0.13.0-dev.46+3648d7df1 +ENV CMAKE_VERSION_MINOR=3.30 +ENV CMAKE_VERSION_PATCH=3.30.1 ENV RUSTUP_HOME=/usr/local/rustup ENV RUSTUP_DIST_SERVER=https://static.rust-lang.org ENV CARGO_HOME=/usr/local/cargo @@ -28,7 +30,11 @@ WORKDIR /veilid # Install build prerequisites & setup required directories deps-base: RUN apt-get -y update - RUN apt-get install -y iproute2 curl build-essential cmake libssl-dev openssl file git pkg-config libdbus-1-dev libdbus-glib-1-dev libgirepository1.0-dev libcairo2-dev checkinstall unzip libncursesw5-dev libncurses5-dev + RUN apt-get install -y iproute2 curl build-essential libssl-dev openssl file git pkg-config libdbus-1-dev libdbus-glib-1-dev libgirepository1.0-dev libcairo2-dev checkinstall unzip libncursesw5-dev libncurses5-dev + RUN curl -O https://cmake.org/files/v$CMAKE_VERSION_MINOR/cmake-$CMAKE_VERSION_PATCH-linux-$(arch).sh + RUN mkdir /opt/cmake + RUN sh cmake-$CMAKE_VERSION_PATCH-linux-$(arch).sh --skip-license --prefix=/opt/cmake + RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake # Install Rust deps-rust: From b091c7853909d7f13f834f90fc88ed93f678c9d4 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Mon, 22 Jul 2024 09:22:44 -0400 Subject: [PATCH 12/13] one more socket error edge case --- veilid-tools/src/network_result.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/veilid-tools/src/network_result.rs b/veilid-tools/src/network_result.rs index 5fa8f93b..0dbfed4c 100644 --- a/veilid-tools/src/network_result.rs +++ b/veilid-tools/src/network_result.rs @@ -32,6 +32,7 @@ impl IoNetworkResultExt for io::Result { Err(e) => match e.kind() { io::ErrorKind::TimedOut => Ok(NetworkResult::Timeout), io::ErrorKind::UnexpectedEof + | io::ErrorKind::BrokenPipe | io::ErrorKind::ConnectionAborted | io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionReset @@ -51,6 +52,7 @@ impl IoNetworkResultExt for io::Result { match e.kind() { io::ErrorKind::TimedOut => Ok(NetworkResult::Timeout), io::ErrorKind::UnexpectedEof + | io::ErrorKind::BrokenPipe | io::ErrorKind::ConnectionAborted | io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)), From bcb9f2a96c0596fe69a850dcd2bc479b5e129bb7 Mon Sep 17 00:00:00 2001 From: Christien Rioux Date: Mon, 22 Jul 2024 09:54:10 -0400 Subject: [PATCH 13/13] remove unnecessary spans --- veilid-core/src/network_manager/native/network_udp.rs | 5 +++-- veilid-core/src/rpc_processor/mod.rs | 1 - veilid-tools/src/tick_task.rs | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/veilid-core/src/network_manager/native/network_udp.rs b/veilid-core/src/network_manager/native/network_udp.rs index b7caa29f..a6fb9267 100644 --- a/veilid-core/src/network_manager/native/network_udp.rs +++ b/veilid-core/src/network_manager/native/network_udp.rs @@ -58,6 +58,7 @@ impl Network { match ph .recv_message(&mut data) .timeout_at(stop_token.clone()) + .in_current_span() .await { Ok(Ok((size, flow))) => { @@ -90,7 +91,7 @@ impl Network { // Now we wait for join handles to exit, // if any error out it indicates an error needing // us to completely restart the network - while let Some(v) = protocol_handlers_unordered.next().await { + while let Some(v) = protocol_handlers_unordered.next().in_current_span().await { // true = stopped, false = errored if !v { // If any protocol handler fails, our socket died and we need to restart the network @@ -99,7 +100,7 @@ impl Network { } log_net!("UDP listener task stopped"); - }); + }.instrument(trace_span!(parent: None, "UDP Listener"))); //////////////////////////////////////////////////////////// // Add to join handle diff --git a/veilid-core/src/rpc_processor/mod.rs b/veilid-core/src/rpc_processor/mod.rs index 8b342f7c..88e6677a 100644 --- a/veilid-core/src/rpc_processor/mod.rs +++ b/veilid-core/src/rpc_processor/mod.rs @@ -1674,7 +1674,6 @@ impl RPCProcessor { } } - #[instrument(level="trace", target="rpc", skip_all)] async fn rpc_worker( self, stop_token: StopToken, diff --git a/veilid-tools/src/tick_task.rs b/veilid-tools/src/tick_task.rs index a121a193..1b95f0c6 100644 --- a/veilid-tools/src/tick_task.rs +++ b/veilid-tools/src/tick_task.rs @@ -126,7 +126,7 @@ impl TickTask { if opt_stop_source.is_some() { // See if the previous execution finished with an error - match self.single_future.check().in_current_span().await { + match self.single_future.check().await { Ok(Some(Err(e))) => { // We have an error result, which means the singlefuture ran but we need to propagate the error return Err(e); @@ -163,7 +163,6 @@ impl TickTask { match self .single_future .single_spawn(&self.name, wrapped_routine) - .in_current_span() .await { // We should have already consumed the result of the last run, or there was none