mirror of
https://gitlab.com/veilid/veilid.git
synced 2024-10-01 01:26:08 -04:00
improve error handling for network errors
make relay requirements more aggressive
This commit is contained in:
parent
f7ce5f93d0
commit
e0c20ed4c9
@ -151,7 +151,7 @@ impl RawTcpProtocolHandler {
|
||||
ps,
|
||||
));
|
||||
|
||||
log_net!(debug "Connection accepted from: {} (TCP)", socket_addr);
|
||||
log_net!("Connection accepted from: {} (TCP)", socket_addr);
|
||||
|
||||
Ok(Some(conn))
|
||||
}
|
||||
|
@ -286,7 +286,11 @@ impl WebsocketProtocolHandler {
|
||||
ws_stream,
|
||||
));
|
||||
|
||||
log_net!(debug "Connection accepted from: {} ({})", socket_addr, if self.arc.tls { "WSS" } else { "WS" });
|
||||
log_net!(
|
||||
"Connection accepted from: {} ({})",
|
||||
socket_addr,
|
||||
if self.arc.tls { "WSS" } else { "WS" }
|
||||
);
|
||||
|
||||
Ok(Some(conn))
|
||||
}
|
||||
|
@ -279,7 +279,7 @@ impl NetworkConnection {
|
||||
) -> SendPinBoxFuture<()> {
|
||||
Box::pin(async move {
|
||||
log_net!(
|
||||
"== Starting process_connection loop for id={}, {:?}", connection_id,
|
||||
"Starting process_connection loop for id={}, {:?}", connection_id,
|
||||
flow
|
||||
);
|
||||
|
||||
@ -294,7 +294,7 @@ impl NetworkConnection {
|
||||
let new_timer = || {
|
||||
sleep(connection_manager.connection_inactivity_timeout_ms()).then(|_| async {
|
||||
// timeout
|
||||
log_net!("== Connection timeout on {:?}", flow);
|
||||
log_net!("Connection timeout on {:?}", flow);
|
||||
RecvLoopAction::Timeout
|
||||
})
|
||||
};
|
||||
@ -354,7 +354,7 @@ impl NetworkConnection {
|
||||
|
||||
// Check for connection close
|
||||
if v.is_no_connection() {
|
||||
log_net!(debug "Connection closed from: {} ({})", peer_address.socket_addr(), peer_address.protocol_type());
|
||||
log_net!("Connection closed from: {} ({})", peer_address.socket_addr(), peer_address.protocol_type());
|
||||
return RecvLoopAction::Finish;
|
||||
}
|
||||
|
||||
@ -428,7 +428,7 @@ impl NetworkConnection {
|
||||
}
|
||||
|
||||
log_net!(
|
||||
"== Connection loop finished flow={:?}",
|
||||
"Connection loop finished flow={:?}",
|
||||
flow
|
||||
);
|
||||
|
||||
|
@ -172,6 +172,12 @@ impl RoutingTable {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Exclude any nodes that have 'failed to send' state indicating a
|
||||
// connection drop or inability to reach the node
|
||||
if e.peer_stats().rpc_stats.failed_to_send > 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Until we have a way of reducing a SignedRelayedNodeInfo to a SignedDirectNodeInfo
|
||||
// See https://gitlab.com/veilid/veilid/-/issues/381
|
||||
// We should consider nodes with allocated relays as disqualified from being a relay themselves
|
||||
|
@ -1500,7 +1500,7 @@ impl RPCProcessor {
|
||||
},
|
||||
// Ignored messages that should be dropped
|
||||
RPCError::Ignore(_) | RPCError::Network(_) | RPCError::TryAgain(_) => {
|
||||
log_rpc!(debug "Dropping RPC Operation: {}", e);
|
||||
log_rpc!("Dropping RPC Operation: {}", e);
|
||||
},
|
||||
// Internal errors that deserve louder logging
|
||||
RPCError::Unimplemented(_) | RPCError::Internal(_) => {
|
||||
@ -1582,7 +1582,7 @@ impl RPCProcessor {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
// Debug on error
|
||||
log_rpc!(debug "Dropping RPC operation: {}", e);
|
||||
log_rpc!(debug "Dropping routed RPC: {}", e);
|
||||
|
||||
// XXX: Punish routes that send routed undecodable crap
|
||||
// address_filter.punish_route_id(xxx, PunishmentReason::FailedToDecodeRoutedMessage);
|
||||
@ -1666,7 +1666,21 @@ impl RPCProcessor {
|
||||
if let Err(e) = self.unlocked_inner
|
||||
.waiting_rpc_table
|
||||
.complete_op_waiter(op_id, msg) {
|
||||
log_rpc!(debug "Operation id {} did not complete: {}", op_id, e);
|
||||
match e {
|
||||
RPCError::Unimplemented(_) |
|
||||
RPCError::Internal(_) => {
|
||||
log_rpc!(error "Could not complete rpc operation: id = {}: {}", op_id, e);
|
||||
},
|
||||
RPCError::InvalidFormat(_) |
|
||||
RPCError::Protocol(_) |
|
||||
RPCError::Network(_) |
|
||||
RPCError::TryAgain(_) => {
|
||||
log_rpc!(debug "Could not complete rpc operation: id = {}: {}", op_id, e);
|
||||
},
|
||||
RPCError::Ignore(_) => {
|
||||
log_rpc!("Answer late: id = {}", op_id);
|
||||
},
|
||||
};
|
||||
// Don't throw an error here because it's okay if the original operation timed out
|
||||
}
|
||||
Ok(NetworkResult::value(()))
|
||||
|
@ -34,7 +34,7 @@ pub struct RPCStats {
|
||||
pub last_seen_ts: Option<Timestamp>, // when the peer was last seen for any reason, including when we first attempted to reach out to it
|
||||
pub first_consecutive_seen_ts: Option<Timestamp>, // the timestamp of the first consecutive proof-of-life for this node (an answer or received question)
|
||||
pub recent_lost_answers: u32, // number of answers that have been lost since we lost reliability
|
||||
pub failed_to_send: u32, // number of messages that have failed to send since we last successfully sent one
|
||||
pub failed_to_send: u32, // number of messages that have failed to send or connections dropped since we last successfully sent one
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
|
||||
|
@ -32,6 +32,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
|
||||
// Err(e) => match e.kind() {
|
||||
// io::ErrorKind::TimedOut => Ok(NetworkResult::Timeout),
|
||||
// io::ErrorKind::UnexpectedEof
|
||||
// | io::ErrorKind::NotConnected
|
||||
// | io::ErrorKind::BrokenPipe
|
||||
// | io::ErrorKind::ConnectionAborted
|
||||
// | io::ErrorKind::ConnectionRefused
|
||||
@ -52,6 +53,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
|
||||
match e.kind() {
|
||||
io::ErrorKind::TimedOut => Ok(NetworkResult::Timeout),
|
||||
io::ErrorKind::UnexpectedEof
|
||||
| io::ErrorKind::NotConnected
|
||||
| io::ErrorKind::BrokenPipe
|
||||
| io::ErrorKind::ConnectionAborted
|
||||
| io::ErrorKind::ConnectionRefused
|
||||
|
Loading…
Reference in New Issue
Block a user