bootstrapper: move fixing & waiting for Cilium to earlier stage

This commit is contained in:
Nils Hanke 2023-03-10 18:27:01 +01:00 committed by Nils Hanke
parent 122b5ff0a0
commit 97d95bd48c
4 changed files with 67 additions and 25 deletions

View file

@ -189,6 +189,26 @@ func (k *KubeWrapper) InitCluster(
return nil, fmt.Errorf("installing pod network: %w", err)
}
// TODO: The timeout here is high as ghcr.io can be slow sometimes. Reduce this later when we move the repository.
// Also remove the logging later.
log.Infof("Waiting for Cilium to become healthy")
timeToStartWaiting := time.Now()
// TODO(Nirusu): Reduce the timeout when we switched the package repository - this is only this high because I once
// saw polling times of ~16 minutes when hitting a slow PoP from Fastly (GitHub's / ghcr.io CDN).
waitCtx, cancel = context.WithTimeout(ctx, 20*time.Minute)
defer cancel()
if err := k.clusterUtil.WaitForCilium(waitCtx, log); err != nil {
return nil, fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}
timeUntilFinishedWaiting := time.Since(timeToStartWaiting)
log.Infof("Cilium took %s to become healthy", timeUntilFinishedWaiting.Round(time.Second).String())
log.Infof("Restart Cilium")
if err := k.clusterUtil.FixCilium(ctx); err != nil {
log.With(zap.Error(err)).Errorf("FixCilium failed")
// Continue and don't throw an error here - things might be okay.
}
var controlPlaneIP string
if strings.Contains(controlPlaneEndpoint, ":") {
controlPlaneIP, _, err = net.SplitHostPort(controlPlaneEndpoint)
@ -239,8 +259,6 @@ func (k *KubeWrapper) InitCluster(
return nil, fmt.Errorf("installing operators: %w", err)
}
k.clusterUtil.FixCilium(log)
return kubeConfig, nil
}
@ -297,7 +315,16 @@ func (k *KubeWrapper) JoinCluster(ctx context.Context, args *kubeadm.BootstrapTo
return fmt.Errorf("joining cluster: %v; %w ", string(joinConfigYAML), err)
}
k.clusterUtil.FixCilium(log)
log.Infof("Waiting for Cilium to become healthy")
if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil {
return fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}
log.Infof("Restart Cilium")
if err := k.clusterUtil.FixCilium(context.Background()); err != nil {
log.With(zap.Error(err)).Errorf("FixCilium failed")
// Continue and don't throw an error here - things might be okay.
}
return nil
}
@ -358,7 +385,17 @@ func (k *KubeWrapper) StartKubelet(log *logger.Logger) error {
return fmt.Errorf("starting kubelet: %w", err)
}
k.clusterUtil.FixCilium(log)
log.Infof("Waiting for Cilium to become healthy")
if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil {
return fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}
log.Infof("Restart Cilium")
if err := k.clusterUtil.FixCilium(context.Background()); err != nil {
log.With(zap.Error(err)).Errorf("FixCilium failed")
// Continue and don't throw an error here - things might be okay.
}
return nil
}