mirror of
https://github.com/edgelesssys/constellation.git
synced 2025-08-02 20:16:15 -04:00
AB#2260 Refactor disk-mapper recovery (#82)
* Refactor disk-mapper recovery * Adapt constellation recover command to use new disk-mapper recovery API * Fix Cilium connectivity on rebooting nodes (#89) * Lower CoreDNS reschedule timeout to 10 seconds (#93) Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
parent
a7b20b2a11
commit
8cb155d5c5
40 changed files with 1600 additions and 1130 deletions
|
@ -13,30 +13,6 @@ import (
|
|||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
)
|
||||
|
||||
func restartSystemdUnit(ctx context.Context, unit string) error {
|
||||
conn, err := dbus.NewSystemdConnectionContext(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("establishing systemd connection: %w", err)
|
||||
}
|
||||
|
||||
restartChan := make(chan string)
|
||||
if _, err := conn.RestartUnitContext(ctx, unit, "replace", restartChan); err != nil {
|
||||
return fmt.Errorf("restarting systemd unit %q: %w", unit, err)
|
||||
}
|
||||
|
||||
// Wait for the restart to finish and actually check if it was
|
||||
// successful or not.
|
||||
result := <-restartChan
|
||||
|
||||
switch result {
|
||||
case "done":
|
||||
return nil
|
||||
|
||||
default:
|
||||
return fmt.Errorf("restarting systemd unit %q failed: expected %v but received %v", unit, "done", result)
|
||||
}
|
||||
}
|
||||
|
||||
func startSystemdUnit(ctx context.Context, unit string) error {
|
||||
conn, err := dbus.NewSystemdConnectionContext(ctx)
|
||||
if err != nil {
|
||||
|
|
|
@ -264,12 +264,19 @@ func (k *KubernetesUtil) deployCiliumGCP(ctx context.Context, helmClient *action
|
|||
return err
|
||||
}
|
||||
|
||||
timeoutS := int64(10)
|
||||
// allow coredns to run on uninitialized nodes (required by cloud-controller-manager)
|
||||
tolerations := []corev1.Toleration{
|
||||
{
|
||||
Key: "node.cloudprovider.kubernetes.io/uninitialized",
|
||||
Value: "true",
|
||||
Effect: "NoSchedule",
|
||||
Effect: corev1.TaintEffectNoSchedule,
|
||||
},
|
||||
{
|
||||
Key: "node.kubernetes.io/unreachable",
|
||||
Operator: corev1.TolerationOpExists,
|
||||
Effect: corev1.TaintEffectNoExecute,
|
||||
TolerationSeconds: &timeoutS,
|
||||
},
|
||||
}
|
||||
if err = kubectl.AddTolerationsToDeployment(ctx, tolerations, "coredns", "kube-system"); err != nil {
|
||||
|
@ -305,7 +312,7 @@ func (k *KubernetesUtil) deployCiliumGCP(ctx context.Context, helmClient *action
|
|||
|
||||
// FixCilium fixes https://github.com/cilium/cilium/issues/19958 but instead of a rollout restart of
|
||||
// the cilium daemonset, it only restarts the local cilium pod.
|
||||
func (k *KubernetesUtil) FixCilium(nodeNameK8s string, log *logger.Logger) {
|
||||
func (k *KubernetesUtil) FixCilium(log *logger.Logger) {
|
||||
// wait for cilium pod to be healthy
|
||||
client := http.Client{}
|
||||
for {
|
||||
|
@ -487,13 +494,6 @@ func (k *KubernetesUtil) StartKubelet() error {
|
|||
return startSystemdUnit(ctx, "kubelet.service")
|
||||
}
|
||||
|
||||
// RestartKubelet restarts a kubelet.
|
||||
func (k *KubernetesUtil) RestartKubelet() error {
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), kubeletStartTimeout)
|
||||
defer cancel()
|
||||
return restartSystemdUnit(ctx, "kubelet.service")
|
||||
}
|
||||
|
||||
// createSignedKubeletCert manually creates a Kubernetes CA signed kubelet certificate for the bootstrapper node.
|
||||
// This is necessary because this node does not request a certificate from the join service.
|
||||
func (k *KubernetesUtil) createSignedKubeletCert(nodeName string, ips []net.IP) error {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue