add StartupDisposition to handle waiting for binding without reporting errors

This commit is contained in:
Christien Rioux 2024-07-08 18:26:26 -04:00
parent 8c297acdee
commit 1048fc6bb9
8 changed files with 441 additions and 350 deletions

View File

@ -219,16 +219,27 @@ impl AttachmentManager {
let netman = self.network_manager(); let netman = self.network_manager();
let mut restart; let mut restart;
loop { let mut restart_delay;
while self.inner.lock().maintain_peers {
restart = false; restart = false;
if let Err(err) = netman.startup().await { restart_delay = 1;
match netman.startup().await {
Err(err) => {
error!("network startup failed: {}", err); error!("network startup failed: {}", err);
netman.shutdown().await;
restart = true; restart = true;
} else { }
Ok(StartupDisposition::BindRetry) => {
info!("waiting for network to bind...");
restart = true;
restart_delay = 10;
}
Ok(StartupDisposition::Success) => {
log_net!(debug "started maintaining peers"); log_net!(debug "started maintaining peers");
while self.inner.lock().maintain_peers { while self.inner.lock().maintain_peers {
// tick network manager // tick network manager
let next_tick_ts = get_timestamp() + 1_000_000u64;
if let Err(err) = netman.tick().await { if let Err(err) = netman.tick().await {
error!("Error in network manager: {}", err); error!("Error in network manager: {}", err);
self.inner.lock().maintain_peers = false; self.inner.lock().maintain_peers = false;
@ -248,7 +259,10 @@ impl AttachmentManager {
self.update_attachment(); self.update_attachment();
// sleep should be at the end in case maintain_peers changes state // sleep should be at the end in case maintain_peers changes state
sleep(1000).await; let wait_duration = next_tick_ts
.saturating_sub(get_timestamp())
.clamp(0, 1_000_000u64);
sleep((wait_duration / 1_000) as u32).await;
} }
log_net!(debug "stopped maintaining peers"); log_net!(debug "stopped maintaining peers");
@ -260,15 +274,22 @@ impl AttachmentManager {
log_net!(debug "stopping network"); log_net!(debug "stopping network");
netman.shutdown().await; netman.shutdown().await;
} }
}
if !restart { if !restart {
break; break;
} }
log_net!(debug "completely restarting attachment"); log_net!(debug "completely restarting attachment");
// chill out for a second first, give network stack time to settle out // chill out for a second first, give network stack time to settle out
for _ in 0..restart_delay {
if !self.inner.lock().maintain_peers {
break;
}
sleep(1000).await; sleep(1000).await;
} }
}
self.update_attaching_detaching_state(AttachmentState::Detached); self.update_attaching_detaching_state(AttachmentState::Detached);
log_net!(debug "attachment stopped"); log_net!(debug "attachment stopped");

View File

@ -136,6 +136,12 @@ enum SendDataToExistingFlowResult {
NotSent(Vec<u8>), NotSent(Vec<u8>),
} }
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum StartupDisposition {
Success,
BindRetry,
}
// The mutable state of the network manager // The mutable state of the network manager
struct NetworkManagerInner { struct NetworkManagerInner {
stats: NetworkManagerStats, stats: NetworkManagerStats,
@ -388,10 +394,10 @@ impl NetworkManager {
} }
#[instrument(level = "debug", skip_all, err)] #[instrument(level = "debug", skip_all, err)]
pub async fn internal_startup(&self) -> EyreResult<()> { pub async fn internal_startup(&self) -> EyreResult<StartupDisposition> {
if self.unlocked_inner.components.read().is_some() { if self.unlocked_inner.components.read().is_some() {
log_net!(debug "NetworkManager::internal_startup already started"); log_net!(debug "NetworkManager::internal_startup already started");
return Ok(()); return Ok(StartupDisposition::Success);
} }
// Clean address filter for things that should not be persistent // Clean address filter for things that should not be persistent
@ -423,26 +429,37 @@ impl NetworkManager {
// Start network components // Start network components
connection_manager.startup().await; connection_manager.startup().await;
net.startup().await?; match net.startup().await? {
StartupDisposition::Success => {}
StartupDisposition::BindRetry => {
return Ok(StartupDisposition::BindRetry);
}
}
rpc_processor.startup().await?; rpc_processor.startup().await?;
receipt_manager.startup().await?; receipt_manager.startup().await?;
log_net!("NetworkManager::internal_startup end"); log_net!("NetworkManager::internal_startup end");
Ok(()) Ok(StartupDisposition::Success)
} }
#[instrument(level = "debug", skip_all, err)] #[instrument(level = "debug", skip_all, err)]
pub async fn startup(&self) -> EyreResult<()> { pub async fn startup(&self) -> EyreResult<StartupDisposition> {
if let Err(e) = self.internal_startup().await { match self.internal_startup().await {
self.shutdown().await; Ok(StartupDisposition::Success) => {
return Err(e);
}
// Inform api clients that things have changed // Inform api clients that things have changed
self.send_network_update(); self.send_network_update();
Ok(StartupDisposition::Success)
Ok(()) }
Ok(StartupDisposition::BindRetry) => {
self.shutdown().await;
Ok(StartupDisposition::BindRetry)
}
Err(e) => {
self.shutdown().await;
Err(e)
}
}
} }
#[instrument(level = "debug", skip_all)] #[instrument(level = "debug", skip_all)]

View File

@ -709,10 +709,7 @@ impl Network {
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
#[instrument(level = "debug", err, skip_all)] pub async fn startup_internal(&self) -> EyreResult<StartupDisposition> {
pub async fn startup(&self) -> EyreResult<()> {
self.inner.lock().network_started = None;
let startup_func = async {
// initialize interfaces // initialize interfaces
self.unlocked_inner.interfaces.refresh().await?; self.unlocked_inner.interfaces.refresh().await?;
@ -861,23 +858,36 @@ impl Network {
// start listeners // start listeners
if protocol_config.inbound.contains(ProtocolType::UDP) { if protocol_config.inbound.contains(ProtocolType::UDP) {
self.bind_udp_protocol_handlers( let res = self
&mut editor_public_internet, .bind_udp_protocol_handlers(&mut editor_public_internet, &mut editor_local_network)
&mut editor_local_network, .await;
) if !matches!(res, Ok(StartupDisposition::Success)) {
.await?; return res;
}
} }
if protocol_config.inbound.contains(ProtocolType::WS) { if protocol_config.inbound.contains(ProtocolType::WS) {
self.start_ws_listeners(&mut editor_public_internet, &mut editor_local_network) let res = self
.await?; .start_ws_listeners(&mut editor_public_internet, &mut editor_local_network)
.await;
if !matches!(res, Ok(StartupDisposition::Success)) {
return res;
}
} }
if protocol_config.inbound.contains(ProtocolType::WSS) { if protocol_config.inbound.contains(ProtocolType::WSS) {
self.start_wss_listeners(&mut editor_public_internet, &mut editor_local_network) let res = self
.await?; .start_wss_listeners(&mut editor_public_internet, &mut editor_local_network)
.await;
if !matches!(res, Ok(StartupDisposition::Success)) {
return res;
}
} }
if protocol_config.inbound.contains(ProtocolType::TCP) { if protocol_config.inbound.contains(ProtocolType::TCP) {
self.start_tcp_listeners(&mut editor_public_internet, &mut editor_local_network) let res = self
.await?; .start_tcp_listeners(&mut editor_public_internet, &mut editor_local_network)
.await;
if !matches!(res, Ok(StartupDisposition::Success)) {
return res;
}
} }
editor_public_internet.setup_network( editor_public_internet.setup_network(
@ -907,18 +917,30 @@ impl Network {
editor_public_internet.commit(true).await; editor_public_internet.commit(true).await;
editor_local_network.commit(true).await; editor_local_network.commit(true).await;
Ok(()) Ok(StartupDisposition::Success)
};
let res = startup_func.await;
if res.is_err() {
info!("network failed to start");
self.inner.lock().network_started = Some(false);
return res;
} }
#[instrument(level = "debug", err, skip_all)]
pub async fn startup(&self) -> EyreResult<StartupDisposition> {
self.inner.lock().network_started = None;
match self.startup_internal().await {
Ok(StartupDisposition::Success) => {
info!("network started"); info!("network started");
self.inner.lock().network_started = Some(true); self.inner.lock().network_started = Some(true);
Ok(()) Ok(StartupDisposition::Success)
}
Ok(StartupDisposition::BindRetry) => {
debug!("network bind retry");
self.inner.lock().network_started = Some(false);
Ok(StartupDisposition::BindRetry)
}
Err(e) => {
debug!("network failed to start");
self.inner.lock().network_started = Some(false);
Err(e)
}
}
} }
pub fn needs_restart(&self) -> bool { pub fn needs_restart(&self) -> bool {

View File

@ -349,7 +349,7 @@ impl Network {
bind_set: NetworkBindSet, bind_set: NetworkBindSet,
is_tls: bool, is_tls: bool,
new_protocol_accept_handler: Box<NewProtocolAcceptHandler>, new_protocol_accept_handler: Box<NewProtocolAcceptHandler>,
) -> EyreResult<Vec<SocketAddress>> { ) -> EyreResult<Option<Vec<SocketAddress>>> {
let mut out = Vec::<SocketAddress>::new(); let mut out = Vec::<SocketAddress>::new();
for ip_addr in bind_set.addrs { for ip_addr in bind_set.addrs {
@ -404,7 +404,8 @@ impl Network {
} }
if !bind_set.search { if !bind_set.search {
bail!("unable to bind to tcp {}", addr); log_net!(debug "unable to bind to tcp {}", addr);
return Ok(None);
} }
if port == 65535u16 { if port == 65535u16 {
@ -419,6 +420,6 @@ impl Network {
} }
} }
Ok(out) Ok(Some(out))
} }
} }

View File

@ -151,7 +151,7 @@ impl Network {
pub(super) async fn create_udp_protocol_handlers( pub(super) async fn create_udp_protocol_handlers(
&self, &self,
bind_set: NetworkBindSet, bind_set: NetworkBindSet,
) -> EyreResult<Vec<DialInfo>> { ) -> EyreResult<Option<Vec<DialInfo>>> {
let mut out = Vec::<DialInfo>::new(); let mut out = Vec::<DialInfo>::new();
for ip_addr in bind_set.addrs { for ip_addr in bind_set.addrs {
@ -175,7 +175,8 @@ impl Network {
} }
if !bind_set.search { if !bind_set.search {
bail!("unable to bind to udp {}", addr); log_net!(debug "unable to bind to udp {}", addr);
return Ok(None);
} }
if port == 65535u16 { if port == 65535u16 {
@ -189,7 +190,7 @@ impl Network {
} }
} }
} }
Ok(out) Ok(Some(out))
} }
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////

View File

@ -140,7 +140,7 @@ impl Network {
&self, &self,
editor_public_internet: &mut RoutingDomainEditor, editor_public_internet: &mut RoutingDomainEditor,
editor_local_network: &mut RoutingDomainEditor, editor_local_network: &mut RoutingDomainEditor,
) -> EyreResult<()> { ) -> EyreResult<StartupDisposition> {
log_net!("UDP: binding protocol handlers"); log_net!("UDP: binding protocol handlers");
let routing_table = self.routing_table(); let routing_table = self.routing_table();
let (listen_address, public_address, detect_address_changes) = { let (listen_address, public_address, detect_address_changes) = {
@ -170,7 +170,10 @@ impl Network {
); );
} }
let mut local_dial_info_list = self.create_udp_protocol_handlers(bind_set).await?; let Some(mut local_dial_info_list) = self.create_udp_protocol_handlers(bind_set).await?
else {
return Ok(StartupDisposition::BindRetry);
};
local_dial_info_list.sort(); local_dial_info_list.sort();
let mut static_public = false; let mut static_public = false;
@ -241,14 +244,16 @@ impl Network {
} }
// Now create tasks for udp listeners // Now create tasks for udp listeners
self.create_udp_listener_tasks().await self.create_udp_listener_tasks().await?;
Ok(StartupDisposition::Success)
} }
pub(super) async fn start_ws_listeners( pub(super) async fn start_ws_listeners(
&self, &self,
editor_public_internet: &mut RoutingDomainEditor, editor_public_internet: &mut RoutingDomainEditor,
editor_local_network: &mut RoutingDomainEditor, editor_local_network: &mut RoutingDomainEditor,
) -> EyreResult<()> { ) -> EyreResult<StartupDisposition> {
log_net!("WS: binding protocol handlers"); log_net!("WS: binding protocol handlers");
let routing_table = self.routing_table(); let routing_table = self.routing_table();
let (listen_address, url, path, detect_address_changes) = { let (listen_address, url, path, detect_address_changes) = {
@ -277,13 +282,16 @@ impl Network {
bind_set.port, bind_set.addrs bind_set.port, bind_set.addrs
); );
} }
let socket_addresses = self let Some(socket_addresses) = self
.start_tcp_listener( .start_tcp_listener(
bind_set, bind_set,
false, false,
Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))), Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))),
) )
.await?; .await?
else {
return Ok(StartupDisposition::BindRetry);
};
log_net!("WS: protocol handlers started on {:#?}", socket_addresses); log_net!("WS: protocol handlers started on {:#?}", socket_addresses);
let mut static_public = false; let mut static_public = false;
@ -353,14 +361,14 @@ impl Network {
Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::WS)); Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::WS));
} }
Ok(()) Ok(StartupDisposition::Success)
} }
pub(super) async fn start_wss_listeners( pub(super) async fn start_wss_listeners(
&self, &self,
editor_public_internet: &mut RoutingDomainEditor, editor_public_internet: &mut RoutingDomainEditor,
editor_local_network: &mut RoutingDomainEditor, editor_local_network: &mut RoutingDomainEditor,
) -> EyreResult<()> { ) -> EyreResult<StartupDisposition> {
log_net!("WSS: binding protocol handlers"); log_net!("WSS: binding protocol handlers");
let (listen_address, url, _detect_address_changes) = { let (listen_address, url, _detect_address_changes) = {
@ -389,13 +397,17 @@ impl Network {
); );
} }
let socket_addresses = self let Some(socket_addresses) = self
.start_tcp_listener( .start_tcp_listener(
bind_set, bind_set,
true, true,
Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))), Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))),
) )
.await?; .await?
else {
return Ok(StartupDisposition::BindRetry);
};
log_net!("WSS: protocol handlers started on {:#?}", socket_addresses); log_net!("WSS: protocol handlers started on {:#?}", socket_addresses);
// NOTE: No interface dial info for WSS, as there is no way to connect to a local dialinfo via TLS // NOTE: No interface dial info for WSS, as there is no way to connect to a local dialinfo via TLS
@ -448,14 +460,14 @@ impl Network {
Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::WSS)); Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::WSS));
} }
Ok(()) Ok(StartupDisposition::Success)
} }
pub(super) async fn start_tcp_listeners( pub(super) async fn start_tcp_listeners(
&self, &self,
editor_public_internet: &mut RoutingDomainEditor, editor_public_internet: &mut RoutingDomainEditor,
editor_local_network: &mut RoutingDomainEditor, editor_local_network: &mut RoutingDomainEditor,
) -> EyreResult<()> { ) -> EyreResult<StartupDisposition> {
log_net!("TCP: binding protocol handlers"); log_net!("TCP: binding protocol handlers");
let routing_table = self.routing_table(); let routing_table = self.routing_table();
@ -484,13 +496,17 @@ impl Network {
bind_set.port, bind_set.addrs bind_set.port, bind_set.addrs
); );
} }
let socket_addresses = self let Some(socket_addresses) = self
.start_tcp_listener( .start_tcp_listener(
bind_set, bind_set,
false, false,
Box::new(|c, _| Box::new(RawTcpProtocolHandler::new(c))), Box::new(|c, _| Box::new(RawTcpProtocolHandler::new(c))),
) )
.await?; .await?
else {
return Ok(StartupDisposition::BindRetry);
};
log_net!("TCP: protocol handlers started on {:#?}", socket_addresses); log_net!("TCP: protocol handlers started on {:#?}", socket_addresses);
let mut static_public = false; let mut static_public = false;
@ -546,6 +562,6 @@ impl Network {
Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::TCP)); Self::add_preferred_local_address(&mut inner, PeerAddress::new(sa, ProtocolType::TCP));
} }
Ok(()) Ok(StartupDisposition::Success)
} }
} }

View File

@ -333,9 +333,7 @@ impl Network {
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
pub async fn startup(&self) -> EyreResult<()> { pub async fn startup_internal(&self) -> EyreResult<StartupDisposition> {
self.inner.lock().network_started = None;
let startup_func = async {
log_net!(debug "starting network"); log_net!(debug "starting network");
// get protocol config // get protocol config
let protocol_config = { let protocol_config = {
@ -397,19 +395,30 @@ impl Network {
// commit routing table edits // commit routing table edits
editor_public_internet.commit(true).await; editor_public_internet.commit(true).await;
Ok(())
};
let res = startup_func.await; Ok(StartupDisposition::Success)
if res.is_err() {
info!("network failed to start");
self.inner.lock().network_started = Some(false);
return res;
} }
pub async fn startup(&self) -> EyreResult<StartupDisposition> {
self.inner.lock().network_started = None;
match self.startup_internal().await {
Ok(StartupDisposition::Success) => {
info!("network started"); info!("network started");
self.inner.lock().network_started = Some(true); self.inner.lock().network_started = Some(true);
Ok(()) Ok(StartupDisposition::Success)
}
Ok(StartupDisposition::BindRetry) => {
debug!("network bind retry");
self.inner.lock().network_started = Some(false);
Ok(StartupDisposition::BindRetry)
}
Err(e) => {
debug!("network failed to start");
self.inner.lock().network_started = Some(false);
Err(e)
}
}
} }
pub fn needs_restart(&self) -> bool { pub fn needs_restart(&self) -> bool {

View File

@ -149,6 +149,7 @@ impl RoutingDomainEditor {
address_type, address_type,
protocol_type, protocol_type,
} => { } => {
if !detail.common_mut().dial_info_details().is_empty() {
if address_type.is_some() || protocol_type.is_some() { if address_type.is_some() || protocol_type.is_some() {
info!( info!(
"[{:?}] cleared dial info: {}:{}", "[{:?}] cleared dial info: {}:{}",
@ -163,13 +164,16 @@ impl RoutingDomainEditor {
} else { } else {
info!("[{:?}] cleared all dial info", self.routing_domain); info!("[{:?}] cleared all dial info", self.routing_domain);
} }
}
detail detail
.common_mut() .common_mut()
.clear_dial_info_details(address_type, protocol_type); .clear_dial_info_details(address_type, protocol_type);
peer_info_changed = true; peer_info_changed = true;
} }
RoutingDomainChange::ClearRelayNode => { RoutingDomainChange::ClearRelayNode => {
if detail.common_mut().relay_node().is_some() {
info!("[{:?}] cleared relay node", self.routing_domain); info!("[{:?}] cleared relay node", self.routing_domain);
}
detail.common_mut().set_relay_node(None); detail.common_mut().set_relay_node(None);
peer_info_changed = true; peer_info_changed = true;
} }