mirror of
https://gitlab.com/veilid/veilid.git
synced 2024-12-28 00:39:25 -05:00
fix for incorrect reliable to dead state transition
This commit is contained in:
parent
4f9e19642c
commit
8aa5c8c5bb
@ -24,7 +24,7 @@ const UNRELIABLE_PING_SPAN_SECS: u32 = 60;
|
||||
const UNRELIABLE_PING_INTERVAL_SECS: u32 = 5;
|
||||
|
||||
/// How many times do we try to ping a never-reached node before we call it dead
|
||||
const NEVER_REACHED_PING_COUNT: u32 = 3;
|
||||
const NEVER_SEEN_PING_COUNT: u32 = 3;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub(crate) enum BucketEntryDeadReason {
|
||||
@ -726,8 +726,8 @@ impl BucketEntryInner {
|
||||
None => Some(BucketEntryUnreliableReason::NotSeenConsecutively),
|
||||
// If we have seen the node consistently for longer than UNRELIABLE_PING_SPAN_SECS then it is reliable
|
||||
Some(ts) => {
|
||||
let is_reliable = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||
if is_reliable {
|
||||
let seen_consecutively = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||
if seen_consecutively {
|
||||
None
|
||||
} else {
|
||||
Some(BucketEntryUnreliableReason::InUnreliablePingSpan)
|
||||
@ -738,7 +738,7 @@ impl BucketEntryInner {
|
||||
pub(super) fn check_dead(&self, cur_ts: Timestamp) -> Option<BucketEntryDeadReason> {
|
||||
|
||||
// If we have failed to send NEVER_REACHED_PING_COUNT times in a row, the node is dead
|
||||
if self.peer_stats.rpc_stats.failed_to_send >= NEVER_REACHED_PING_COUNT {
|
||||
if self.peer_stats.rpc_stats.failed_to_send >= NEVER_SEEN_PING_COUNT {
|
||||
return Some(BucketEntryDeadReason::FailedToSend);
|
||||
}
|
||||
|
||||
@ -746,8 +746,8 @@ impl BucketEntryInner {
|
||||
// a node is not dead if we haven't heard from it yet,
|
||||
// but we give it NEVER_REACHED_PING_COUNT chances to ping before we say it's dead
|
||||
None => {
|
||||
let is_dead = self.peer_stats.rpc_stats.recent_lost_answers >= NEVER_REACHED_PING_COUNT;
|
||||
if is_dead {
|
||||
let no_answers = self.peer_stats.rpc_stats.recent_lost_answers >= NEVER_SEEN_PING_COUNT;
|
||||
if no_answers {
|
||||
Some(BucketEntryDeadReason::TooManyLostAnswers)
|
||||
} else {
|
||||
None
|
||||
@ -755,9 +755,11 @@ impl BucketEntryInner {
|
||||
}
|
||||
|
||||
// return dead if we have not heard from the node at all for the duration of the unreliable ping span
|
||||
// and we have tried to reach it and failed the entire time of unreliable ping span
|
||||
Some(ts) => {
|
||||
let is_dead = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||
if is_dead {
|
||||
let not_seen = cur_ts.saturating_sub(ts) >= TimestampDuration::new(UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64);
|
||||
let no_answers = self.peer_stats.rpc_stats.recent_lost_answers >= (UNRELIABLE_PING_SPAN_SECS / UNRELIABLE_PING_INTERVAL_SECS);
|
||||
if not_seen && no_answers {
|
||||
Some(BucketEntryDeadReason::NoPingResponse)
|
||||
} else {
|
||||
None
|
||||
@ -824,7 +826,7 @@ impl BucketEntryInner {
|
||||
}
|
||||
BucketEntryState::Unreliable => {
|
||||
// If we are in an unreliable state, we need a ping every UNRELIABLE_PING_INTERVAL_SECS seconds
|
||||
self.needs_constant_ping(cur_ts, TimestampDuration::new(UNRELIABLE_PING_INTERVAL_SECS as u64 * 1000000u64))
|
||||
self.needs_constant_ping(cur_ts, TimestampDuration::new(UNRELIABLE_PING_INTERVAL_SECS as u64 * 1_000_000u64))
|
||||
}
|
||||
BucketEntryState::Dead => {
|
||||
error!("Should not be asking this for dead nodes");
|
||||
|
Loading…
Reference in New Issue
Block a user