fix cilium WireGuard Pod2Pod connectivity

This commit is contained in:
Leonard Cohnen 2022-06-13 16:01:21 +02:00 committed by 3u13r
parent f7ba87135d
commit 766182b7e7
4 changed files with 53 additions and 6 deletions

View File

@ -4,9 +4,11 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"net/http"
"os" "os"
"os/exec" "os/exec"
"regexp" "regexp"
"strings"
"time" "time"
"github.com/edgelesssys/constellation/coordinator/kubernetes/k8sapi/resources" "github.com/edgelesssys/constellation/coordinator/kubernetes/k8sapi/resources"
@ -160,21 +162,57 @@ func (k *KubernetesUtil) setupGCPPodNetwork(ctx context.Context, nodeName, nodeP
return err return err
} }
err = exec.CommandContext(ctx, kubectlPath, "--kubeconfig", kubeConfig, "-n", "kube-system", "patch", "deployment", "coredns", "--type", "json", "-p", "[{\"op\":\"add\",\"path\":\"/spec/template/spec/tolerations/-\",\"value\":{\"key\":\"node.kubernetes.io/network-unavailable\",\"value\":\"\",\"effect\":\"NoSchedule\"}}]").Run() ciliumInstall := exec.CommandContext(ctx, "cilium", "install", "--ipam", "kubernetes", "--ipv4-native-routing-cidr", subnetworkPodCIDR,
if err != nil { "--helm-set", "endpointRoutes.enabled=true,tunnel=disabled,encryption.enabled=true,encryption.type=wireguard,l7Proxy=false")
return err
}
ciliumInstall := exec.CommandContext(ctx, "cilium", "install", "--ipam", "kubernetes", "--ipv4-native-routing-cidr", subnetworkPodCIDR, "--helm-set", "endpointRoutes.enabled=true,tunnel=disabled,encryption.enabled=true,encryption.type=wireguard,l7Proxy=false")
ciliumInstall.Env = append(os.Environ(), "KUBECONFIG="+kubeConfig) ciliumInstall.Env = append(os.Environ(), "KUBECONFIG="+kubeConfig)
out, err = ciliumInstall.CombinedOutput() out, err = ciliumInstall.CombinedOutput()
if err != nil { if err != nil {
err = errors.New(string(out)) err = errors.New(string(out))
return err return err
} }
return nil return nil
} }
// FixCilium fixes https://github.com/cilium/cilium/issues/19958 but instead of a rollout restart of
// the cilium daemonset, it only restarts the local cilium pod.
func (k *KubernetesUtil) FixCilium(nodeNameK8s string) {
// wait for cilium pod to be healthy
for {
time.Sleep(5 * time.Second)
resp, err := http.Get("http://127.0.0.1:9876/healthz")
if err != nil {
fmt.Printf("waiting for local cilium daemonset pod not healthy: %v\n", err)
continue
}
resp.Body.Close()
if resp.StatusCode == 200 {
break
}
}
// get cilium pod name
out, err := exec.CommandContext(context.Background(), "/bin/bash", "-c", "/run/state/bin/crictl ps -o json | jq -r '.containers[] | select(.metadata.name == \"cilium-agent\") | .podSandboxId'").CombinedOutput()
if err != nil {
fmt.Printf("getting pod id failed: %v: %v\n", err, string(out))
return
}
outLines := strings.Split(string(out), "\n")
fmt.Println(outLines)
podID := outLines[len(outLines)-2]
// stop and delete pod
out, err = exec.CommandContext(context.Background(), "/run/state/bin/crictl", "stopp", podID).CombinedOutput()
if err != nil {
fmt.Printf("stopping cilium agent pod failed: %v: %v\n", err, string(out))
return
}
out, err = exec.CommandContext(context.Background(), "/run/state/bin/crictl", "rmp", podID).CombinedOutput()
if err != nil {
fmt.Printf("removing cilium agent pod failed: %v: %v\n", err, string(out))
}
}
func (k *KubernetesUtil) setupQemuPodNetwork(ctx context.Context) error { func (k *KubernetesUtil) setupQemuPodNetwork(ctx context.Context) error {
ciliumInstall := exec.CommandContext(ctx, "cilium", "install", "--encryption", "wireguard", "--helm-set", "ipam.operator.clusterPoolIPv4PodCIDRList=10.244.0.0/16,endpointRoutes.enabled=true") ciliumInstall := exec.CommandContext(ctx, "cilium", "install", "--encryption", "wireguard", "--helm-set", "ipam.operator.clusterPoolIPv4PodCIDRList=10.244.0.0/16,endpointRoutes.enabled=true")
ciliumInstall.Env = append(os.Environ(), "KUBECONFIG="+kubeConfig) ciliumInstall.Env = append(os.Environ(), "KUBECONFIG="+kubeConfig)
@ -248,6 +286,7 @@ func (k *KubernetesUtil) JoinCluster(ctx context.Context, joinConfig []byte) err
} }
return fmt.Errorf("kubeadm join failed: %w", err) return fmt.Errorf("kubeadm join failed: %w", err)
} }
return nil return nil
} }

View File

@ -23,4 +23,5 @@ type clusterUtil interface {
RestartKubelet() error RestartKubelet() error
GetControlPlaneJoinCertificateKey(ctx context.Context) (string, error) GetControlPlaneJoinCertificateKey(ctx context.Context) (string, error)
CreateJoinToken(ctx context.Context, ttl time.Duration) (*kubeadm.BootstrapTokenDiscovery, error) CreateJoinToken(ctx context.Context, ttl time.Duration) (*kubeadm.BootstrapTokenDiscovery, error)
FixCilium(nodeNameK8s string)
} }

View File

@ -157,6 +157,8 @@ func (k *KubeWrapper) InitCluster(ctx context.Context, autoscalingNodeGroups []s
return fmt.Errorf("failed to setup access-manager: %w", err) return fmt.Errorf("failed to setup access-manager: %w", err)
} }
go k.clusterUtil.FixCilium(nodeName)
return nil return nil
} }
@ -210,6 +212,8 @@ func (k *KubeWrapper) JoinCluster(ctx context.Context, args *kubeadm.BootstrapTo
return fmt.Errorf("joining cluster failed: %v %w ", string(joinConfigYAML), err) return fmt.Errorf("joining cluster failed: %v %w ", string(joinConfigYAML), err)
} }
go k.clusterUtil.FixCilium(nodeName)
return nil return nil
} }

View File

@ -566,6 +566,9 @@ func (s *stubClusterUtil) CreateJoinToken(ctx context.Context, ttl time.Duration
return s.createJoinTokenResponse, s.createJoinTokenErr return s.createJoinTokenResponse, s.createJoinTokenErr
} }
func (s *stubClusterUtil) FixCilium(nodeName string) {
}
type stubConfigProvider struct { type stubConfigProvider struct {
InitConfig k8sapi.KubeadmInitYAML InitConfig k8sapi.KubeadmInitYAML
JoinConfig k8sapi.KubeadmJoinYAML JoinConfig k8sapi.KubeadmJoinYAML