mirror of
https://gitlab.com/veilid/veilid.git
synced 2025-01-11 15:29:30 -05:00
better race condition handling
This commit is contained in:
parent
4b2164a546
commit
7ed6b44d21
@ -289,28 +289,29 @@ impl ConnectionManager {
|
||||
.await;
|
||||
match result_net_res {
|
||||
Ok(net_res) => {
|
||||
if net_res.is_value() || retry_count == 0 {
|
||||
break net_res;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if retry_count == 0 {
|
||||
// Try one last time to return a connection from the table, in case
|
||||
// an 'accept' happened at literally the same time as our connect
|
||||
// If the connection 'already exists', then try one last time to return a connection from the table, in case
|
||||
// an 'accept' happened at literally the same time as our connect
|
||||
if net_res.is_already_exists() {
|
||||
if let Some(conn) = self
|
||||
.arc
|
||||
.connection_table
|
||||
.get_last_connection_by_remote(peer_address)
|
||||
{
|
||||
log_net!(
|
||||
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
|
||||
local_addr.green(),
|
||||
peer_address.green()
|
||||
);
|
||||
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
|
||||
local_addr.green(),
|
||||
peer_address.green()
|
||||
);
|
||||
|
||||
return Ok(NetworkResult::Value(conn));
|
||||
}
|
||||
|
||||
}
|
||||
if net_res.is_value() || retry_count == 0 {
|
||||
break net_res;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if retry_count == 0 {
|
||||
return Err(e).wrap_err("failed to connect");
|
||||
}
|
||||
}
|
||||
|
@ -223,7 +223,15 @@ impl NetworkManager {
|
||||
this.unlocked_inner
|
||||
.rolling_transfers_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().rolling_transfers_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.rolling_transfers_task_routine(s, l, t)
|
||||
.instrument(trace_span!(
|
||||
parent: None,
|
||||
"NetworkManager rolling transfers task routine"
|
||||
)),
|
||||
)
|
||||
});
|
||||
}
|
||||
// Set relay management tick task
|
||||
@ -232,7 +240,12 @@ impl NetworkManager {
|
||||
this.unlocked_inner
|
||||
.relay_management_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().relay_management_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.relay_management_task_routine(s, l, t)
|
||||
.instrument(trace_span!(parent: None, "relay management task routine")),
|
||||
)
|
||||
});
|
||||
}
|
||||
// Set bootstrap tick task
|
||||
@ -240,7 +253,14 @@ impl NetworkManager {
|
||||
let this2 = this.clone();
|
||||
this.unlocked_inner
|
||||
.bootstrap_task
|
||||
.set_routine(move |s, _l, _t| Box::pin(this2.clone().bootstrap_task_routine(s)));
|
||||
.set_routine(move |s, _l, _t| {
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.bootstrap_task_routine(s)
|
||||
.instrument(trace_span!(parent: None, "bootstrap task routine")),
|
||||
)
|
||||
});
|
||||
}
|
||||
// Set peer minimum refresh tick task
|
||||
{
|
||||
@ -248,7 +268,15 @@ impl NetworkManager {
|
||||
this.unlocked_inner
|
||||
.peer_minimum_refresh_task
|
||||
.set_routine(move |s, _l, _t| {
|
||||
Box::pin(this2.clone().peer_minimum_refresh_task_routine(s))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.peer_minimum_refresh_task_routine(s)
|
||||
.instrument(trace_span!(
|
||||
parent: None,
|
||||
"peer minimum refresh task routine"
|
||||
)),
|
||||
)
|
||||
});
|
||||
}
|
||||
// Set ping validator tick task
|
||||
@ -257,7 +285,12 @@ impl NetworkManager {
|
||||
this.unlocked_inner
|
||||
.ping_validator_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().ping_validator_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.ping_validator_task_routine(s, l, t)
|
||||
.instrument(trace_span!(parent: None, "ping validator task routine")),
|
||||
)
|
||||
});
|
||||
}
|
||||
// Set public address check task
|
||||
@ -266,7 +299,15 @@ impl NetworkManager {
|
||||
this.unlocked_inner
|
||||
.public_address_check_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().public_address_check_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.public_address_check_task_routine(s, l, t)
|
||||
.instrument(trace_span!(
|
||||
parent: None,
|
||||
"public address check task routine"
|
||||
)),
|
||||
)
|
||||
});
|
||||
}
|
||||
this
|
||||
|
@ -462,11 +462,13 @@ impl Network {
|
||||
|
||||
// receive single response
|
||||
let mut out = vec![0u8; MAX_MESSAGE_SIZE];
|
||||
let (recv_len, recv_addr) =
|
||||
network_result_try!(timeout(timeout_ms, h.recv_message(&mut out))
|
||||
.await
|
||||
.into_network_result())
|
||||
.wrap_err("recv_message failure")?;
|
||||
let (recv_len, recv_addr) = network_result_try!(timeout(
|
||||
timeout_ms,
|
||||
h.recv_message(&mut out).instrument(Span::current())
|
||||
)
|
||||
.await
|
||||
.into_network_result())
|
||||
.wrap_err("recv_message failure")?;
|
||||
|
||||
let recv_socket_addr = recv_addr.remote_address().to_socket_addr();
|
||||
self.network_manager()
|
||||
|
@ -54,7 +54,7 @@ impl Network {
|
||||
|
||||
loop {
|
||||
match ph
|
||||
.recv_message(&mut data)
|
||||
.recv_message(&mut data).instrument(Span::current())
|
||||
.timeout_at(stop_token.clone())
|
||||
.await
|
||||
{
|
||||
|
@ -172,7 +172,8 @@ pub fn new_bound_first_tcp_socket(local_address: SocketAddr) -> io::Result<Socke
|
||||
}
|
||||
|
||||
// Non-blocking connect is tricky when you want to start with a prepared socket
|
||||
#[instrument(level = "trace", ret, err)]
|
||||
// Errors should not be logged as they are valid conditions for this function
|
||||
#[instrument(level = "trace", ret)]
|
||||
pub async fn nonblocking_connect(
|
||||
socket: Socket,
|
||||
addr: SocketAddr,
|
||||
@ -184,9 +185,6 @@ pub async fn nonblocking_connect(
|
||||
// Make socket2 SockAddr
|
||||
let socket2_addr = socket2::SockAddr::from(addr);
|
||||
|
||||
// XXX
|
||||
//let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
|
||||
|
||||
// Connect to the remote address
|
||||
match socket.connect(&socket2_addr) {
|
||||
Ok(()) => Ok(()),
|
||||
@ -194,28 +192,7 @@ pub async fn nonblocking_connect(
|
||||
Err(err) if err.raw_os_error() == Some(libc::EINPROGRESS) => Ok(()),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => Ok(()),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
.map_err(|e| {
|
||||
// XXX
|
||||
// warn!(
|
||||
// "DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
// bind_local_addr,
|
||||
// socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
// addr,
|
||||
// backtrace::Backtrace::new(),
|
||||
// );
|
||||
e
|
||||
})?;
|
||||
|
||||
// XXX
|
||||
// warn!(
|
||||
// "DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
// bind_local_addr,
|
||||
// socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
// addr,
|
||||
// backtrace::Backtrace::new(),
|
||||
// );
|
||||
|
||||
}?;
|
||||
let async_stream = Async::new(std::net::TcpStream::from(socket))?;
|
||||
|
||||
// The stream becomes writable when connected
|
||||
|
@ -125,7 +125,15 @@ impl RoutingTable {
|
||||
this.unlocked_inner
|
||||
.rolling_transfers_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().rolling_transfers_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.rolling_transfers_task_routine(s, l, t)
|
||||
.instrument(trace_span!(
|
||||
parent: None,
|
||||
"RoutingTable rolling transfers task routine"
|
||||
)),
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
@ -135,7 +143,12 @@ impl RoutingTable {
|
||||
this.unlocked_inner
|
||||
.kick_buckets_task
|
||||
.set_routine(move |s, l, t| {
|
||||
Box::pin(this2.clone().kick_buckets_task_routine(s, l, t))
|
||||
Box::pin(
|
||||
this2
|
||||
.clone()
|
||||
.kick_buckets_task_routine(s, l, t)
|
||||
.instrument(trace_span!(parent: None, "kick buckets task routine")),
|
||||
)
|
||||
});
|
||||
}
|
||||
this
|
||||
|
@ -406,6 +406,9 @@ impl VeilidAPI {
|
||||
NetworkResult::NoConnection(e) => {
|
||||
return Ok(format!("NoConnection({})", e));
|
||||
}
|
||||
NetworkResult::AlreadyExists(e) => {
|
||||
return Ok(format!("AlreadyExists({})", e));
|
||||
}
|
||||
NetworkResult::InvalidMessage(e) => {
|
||||
return Ok(format!("InvalidMessage({})", e));
|
||||
}
|
||||
|
@ -130,11 +130,12 @@ impl RoutingContext {
|
||||
let answer = match rpc_processor.rpc_call_app_call(dest, request).await {
|
||||
Ok(NetworkResult::Value(v)) => v,
|
||||
Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout),
|
||||
Ok(NetworkResult::NoConnection(e)) => {
|
||||
Ok(NetworkResult::NoConnection(e)) | Ok(NetworkResult::AlreadyExists(e)) => {
|
||||
return Err(VeilidAPIError::NoConnection {
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
Ok(NetworkResult::InvalidMessage(message)) => {
|
||||
return Err(VeilidAPIError::Generic { message })
|
||||
}
|
||||
@ -159,7 +160,7 @@ impl RoutingContext {
|
||||
match rpc_processor.rpc_call_app_message(dest, message).await {
|
||||
Ok(NetworkResult::Value(())) => {}
|
||||
Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout),
|
||||
Ok(NetworkResult::NoConnection(e)) => {
|
||||
Ok(NetworkResult::NoConnection(e)) | Ok(NetworkResult::AlreadyExists(e)) => {
|
||||
return Err(VeilidAPIError::NoConnection {
|
||||
message: e.to_string(),
|
||||
})
|
||||
|
@ -35,6 +35,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
|
||||
| io::ErrorKind::ConnectionReset
|
||||
| io::ErrorKind::HostUnreachable
|
||||
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
},
|
||||
#[cfg(not(feature = "io_error_more"))]
|
||||
@ -50,6 +51,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
|
||||
io::ErrorKind::ConnectionAborted
|
||||
| io::ErrorKind::ConnectionRefused
|
||||
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
}
|
||||
}
|
||||
@ -66,6 +68,7 @@ impl<T, E> NetworkResultResultExt<T, E> for NetworkResult<Result<T, E>> {
|
||||
match self {
|
||||
NetworkResult::Timeout => Ok(NetworkResult::<T>::Timeout),
|
||||
NetworkResult::NoConnection(e) => Ok(NetworkResult::<T>::NoConnection(e)),
|
||||
NetworkResult::AlreadyExists(e) => Ok(NetworkResult::<T>::AlreadyExists(e)),
|
||||
NetworkResult::InvalidMessage(s) => Ok(NetworkResult::<T>::InvalidMessage(s)),
|
||||
NetworkResult::Value(Ok(v)) => Ok(NetworkResult::<T>::Value(v)),
|
||||
NetworkResult::Value(Err(e)) => Err(e),
|
||||
@ -90,6 +93,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<TimeoutOr<T>> {
|
||||
| io::ErrorKind::ConnectionReset
|
||||
| io::ErrorKind::HostUnreachable
|
||||
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
},
|
||||
#[cfg(not(feature = "io_error_more"))]
|
||||
@ -105,6 +109,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<TimeoutOr<T>> {
|
||||
io::ErrorKind::ConnectionAborted
|
||||
| io::ErrorKind::ConnectionRefused
|
||||
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
}
|
||||
}
|
||||
@ -124,6 +129,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
|
||||
| io::ErrorKind::ConnectionReset
|
||||
| io::ErrorKind::HostUnreachable
|
||||
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
},
|
||||
#[cfg(not(feature = "io_error_more"))]
|
||||
@ -139,6 +145,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
|
||||
io::ErrorKind::ConnectionAborted
|
||||
| io::ErrorKind::ConnectionRefused
|
||||
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
|
||||
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
|
||||
_ => Err(e),
|
||||
}
|
||||
}
|
||||
@ -153,6 +160,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
|
||||
pub enum NetworkResult<T> {
|
||||
Timeout,
|
||||
NoConnection(io::Error),
|
||||
AlreadyExists(io::Error),
|
||||
InvalidMessage(String),
|
||||
Value(T),
|
||||
}
|
||||
@ -170,7 +178,9 @@ impl<T> NetworkResult<T> {
|
||||
pub fn invalid_message<S: ToString>(s: S) -> Self {
|
||||
Self::InvalidMessage(s.to_string())
|
||||
}
|
||||
|
||||
pub fn already_exists(e: io::Error) -> Self {
|
||||
Self::AlreadyExists(e)
|
||||
}
|
||||
pub fn value(value: T) -> Self {
|
||||
Self::Value(value)
|
||||
}
|
||||
@ -181,6 +191,9 @@ impl<T> NetworkResult<T> {
|
||||
pub fn is_no_connection(&self) -> bool {
|
||||
matches!(self, Self::NoConnection(_))
|
||||
}
|
||||
pub fn is_already_exists(&self) -> bool {
|
||||
matches!(self, Self::AlreadyExists(_))
|
||||
}
|
||||
pub fn is_value(&self) -> bool {
|
||||
matches!(self, Self::Value(_))
|
||||
}
|
||||
@ -188,6 +201,7 @@ impl<T> NetworkResult<T> {
|
||||
match self {
|
||||
Self::Timeout => NetworkResult::<X>::Timeout,
|
||||
Self::NoConnection(e) => NetworkResult::<X>::NoConnection(e),
|
||||
Self::AlreadyExists(e) => NetworkResult::<X>::AlreadyExists(e),
|
||||
Self::InvalidMessage(s) => NetworkResult::<X>::InvalidMessage(s),
|
||||
Self::Value(v) => NetworkResult::<X>::Value(f(v)),
|
||||
}
|
||||
@ -196,6 +210,7 @@ impl<T> NetworkResult<T> {
|
||||
match self {
|
||||
Self::Timeout => Err(io::Error::new(io::ErrorKind::TimedOut, "Timed out")),
|
||||
Self::NoConnection(e) => Err(e),
|
||||
Self::AlreadyExists(e) => Err(e),
|
||||
Self::InvalidMessage(s) => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("Invalid message: {}", s),
|
||||
@ -230,6 +245,7 @@ impl<T: Debug> Debug for NetworkResult<T> {
|
||||
match self {
|
||||
Self::Timeout => write!(f, "Timeout"),
|
||||
Self::NoConnection(e) => f.debug_tuple("NoConnection").field(e).finish(),
|
||||
Self::AlreadyExists(e) => f.debug_tuple("AlreadyExists").field(e).finish(),
|
||||
Self::InvalidMessage(s) => f.debug_tuple("InvalidMessage").field(s).finish(),
|
||||
Self::Value(v) => f.debug_tuple("Value").field(v).finish(),
|
||||
}
|
||||
@ -241,6 +257,7 @@ impl<T> Display for NetworkResult<T> {
|
||||
match self {
|
||||
Self::Timeout => write!(f, "Timeout"),
|
||||
Self::NoConnection(e) => write!(f, "NoConnection({})", e.kind()),
|
||||
Self::AlreadyExists(e) => write!(f, "AlreadyExists({})", e.kind()),
|
||||
Self::InvalidMessage(s) => write!(f, "InvalidMessage({})", s),
|
||||
Self::Value(_) => write!(f, "Value"),
|
||||
}
|
||||
@ -258,6 +275,7 @@ macro_rules! network_result_try {
|
||||
match $r {
|
||||
NetworkResult::Timeout => return Ok(NetworkResult::Timeout),
|
||||
NetworkResult::NoConnection(e) => return Ok(NetworkResult::NoConnection(e)),
|
||||
NetworkResult::AlreadyExists(e) => return Ok(NetworkResult::AlreadyExists(e)),
|
||||
NetworkResult::InvalidMessage(s) => return Ok(NetworkResult::InvalidMessage(s)),
|
||||
NetworkResult::Value(v) => v,
|
||||
}
|
||||
@ -272,6 +290,10 @@ macro_rules! network_result_try {
|
||||
$f;
|
||||
return Ok(NetworkResult::NoConnection(e));
|
||||
}
|
||||
NetworkResult::AlreadyExists(e) => {
|
||||
$f;
|
||||
return Ok(NetworkResult::AlreadyExists(e));
|
||||
}
|
||||
NetworkResult::InvalidMessage(s) => {
|
||||
$f;
|
||||
return Ok(NetworkResult::InvalidMessage(s));
|
||||
@ -293,6 +315,10 @@ macro_rules! network_result_value_or_log {
|
||||
log_net!($level "{}({}) at {}@{}:{}", "No connection".green(), e.to_string(), file!(), line!(), column!());
|
||||
$f
|
||||
}
|
||||
NetworkResult::AlreadyExists(e) => {
|
||||
log_net!($level "{}({}) at {}@{}:{}", "Already exists".green(), e.to_string(), file!(), line!(), column!());
|
||||
$f
|
||||
}
|
||||
NetworkResult::InvalidMessage(s) => {
|
||||
log_net!($level "{}({}) at {}@{}:{}", "Invalid message".green(), s, file!(), line!(), column!());
|
||||
$f
|
||||
|
Loading…
Reference in New Issue
Block a user