mirror of
https://gitlab.com/veilid/veilid.git
synced 2025-08-09 07:02:41 -04:00
fix for incorrect reliable to dead state transition
This commit is contained in:
parent
4f9e19642c
commit
8aa5c8c5bb
1 changed files with 11 additions and 9 deletions
|
@ -24,7 +24,7 @@ const UNRELIABLE_PING_SPAN_SECS: u32 = 60;
|
||||||
const UNRELIABLE_PING_INTERVAL_SECS: u32 = 5;
|
const UNRELIABLE_PING_INTERVAL_SECS: u32 = 5;
|
||||||
|
|
||||||
/// How many times do we try to ping a never-reached node before we call it dead
|
/// How many times do we try to ping a never-reached node before we call it dead
|
||||||
const NEVER_REACHED_PING_COUNT: u32 = 3;
|
const NEVER_SEEN_PING_COUNT: u32 = 3;
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
pub(crate) enum BucketEntryDeadReason {
|
pub(crate) enum BucketEntryDeadReason {
|
||||||
|
@ -726,8 +726,8 @@ impl BucketEntryInner {
|
||||||
None => Some(BucketEntryUnreliableReason::NotSeenConsecutively),
|
None => Some(BucketEntryUnreliableReason::NotSeenConsecutively),
|
||||||
// If we have seen the node consistently for longer than UNRELIABLE_PING_SPAN_SECS then it is reliable
|
// If we have seen the node consistently for longer than UNRELIABLE_PING_SPAN_SECS then it is reliable
|
||||||
Some(ts) => {
|
Some(ts) => {
|
||||||
let is_reliable = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
let seen_consecutively = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||||
if is_reliable {
|
if seen_consecutively {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(BucketEntryUnreliableReason::InUnreliablePingSpan)
|
Some(BucketEntryUnreliableReason::InUnreliablePingSpan)
|
||||||
|
@ -738,7 +738,7 @@ impl BucketEntryInner {
|
||||||
pub(super) fn check_dead(&self, cur_ts: Timestamp) -> Option<BucketEntryDeadReason> {
|
pub(super) fn check_dead(&self, cur_ts: Timestamp) -> Option<BucketEntryDeadReason> {
|
||||||
|
|
||||||
// If we have failed to send NEVER_REACHED_PING_COUNT times in a row, the node is dead
|
// If we have failed to send NEVER_REACHED_PING_COUNT times in a row, the node is dead
|
||||||
if self.peer_stats.rpc_stats.failed_to_send >= NEVER_REACHED_PING_COUNT {
|
if self.peer_stats.rpc_stats.failed_to_send >= NEVER_SEEN_PING_COUNT {
|
||||||
return Some(BucketEntryDeadReason::FailedToSend);
|
return Some(BucketEntryDeadReason::FailedToSend);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -746,8 +746,8 @@ impl BucketEntryInner {
|
||||||
// a node is not dead if we haven't heard from it yet,
|
// a node is not dead if we haven't heard from it yet,
|
||||||
// but we give it NEVER_REACHED_PING_COUNT chances to ping before we say it's dead
|
// but we give it NEVER_REACHED_PING_COUNT chances to ping before we say it's dead
|
||||||
None => {
|
None => {
|
||||||
let is_dead = self.peer_stats.rpc_stats.recent_lost_answers >= NEVER_REACHED_PING_COUNT;
|
let no_answers = self.peer_stats.rpc_stats.recent_lost_answers >= NEVER_SEEN_PING_COUNT;
|
||||||
if is_dead {
|
if no_answers {
|
||||||
Some(BucketEntryDeadReason::TooManyLostAnswers)
|
Some(BucketEntryDeadReason::TooManyLostAnswers)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -755,9 +755,11 @@ impl BucketEntryInner {
|
||||||
}
|
}
|
||||||
|
|
||||||
// return dead if we have not heard from the node at all for the duration of the unreliable ping span
|
// return dead if we have not heard from the node at all for the duration of the unreliable ping span
|
||||||
|
// and we have tried to reach it and failed the entire time of unreliable ping span
|
||||||
Some(ts) => {
|
Some(ts) => {
|
||||||
let is_dead = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
let not_seen = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||||
if is_dead {
|
let no_answers = self.peer_stats.rpc_stats.recent_lost_answers >= (UNRELIABLE_PING_SPAN_SECS / UNRELIABLE_PING_INTERVAL_SECS);
|
||||||
|
if not_seen && no_answers {
|
||||||
Some(BucketEntryDeadReason::NoPingResponse)
|
Some(BucketEntryDeadReason::NoPingResponse)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -824,7 +826,7 @@ impl BucketEntryInner {
|
||||||
}
|
}
|
||||||
BucketEntryState::Unreliable => {
|
BucketEntryState::Unreliable => {
|
||||||
// If we are in an unreliable state, we need a ping every UNRELIABLE_PING_INTERVAL_SECS seconds
|
// If we are in an unreliable state, we need a ping every UNRELIABLE_PING_INTERVAL_SECS seconds
|
||||||
self.needs_constant_ping(cur_ts, TimestampDuration::new(UNRELIABLE_PING_INTERVAL_SECS as u64 * 1000000u64))
|
self.needs_constant_ping(cur_ts, TimestampDuration::new(UNRELIABLE_PING_INTERVAL_SECS as u64 * 1_000_000u64))
|
||||||
}
|
}
|
||||||
BucketEntryState::Dead => {
|
BucketEntryState::Dead => {
|
||||||
error!("Should not be asking this for dead nodes");
|
error!("Should not be asking this for dead nodes");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue