diff --git a/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/crds/nodeversion-crd.yaml b/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/crds/nodeversion-crd.yaml index 90a07f684..9c46b695c 100644 --- a/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/crds/nodeversion-crd.yaml +++ b/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/crds/nodeversion-crd.yaml @@ -52,6 +52,74 @@ spec: status: description: NodeVersionStatus defines the observed state of NodeVersion. properties: + activeclusterversionupgrade: + description: ActiveClusterVersionUpgrade indicates whether the cluster + is currently upgrading. + type: boolean + awaitingAnnotation: + description: AwaitingAnnotation is a list of nodes that are waiting + for the operator to annotate them. + items: + description: "ObjectReference contains enough information to let + you inspect or modify the referred object. --- New uses of this + type are discouraged because of difficulty describing its usage + when embedded in APIs. 1. Ignored fields. It includes many fields + which are not generally honored. For instance, ResourceVersion + and FieldPath are both very rarely valid in actual usage. 2. Invalid + usage help. It is impossible to add specific help for individual + usage. In most embedded usages, there are particular restrictions + like, \"must refer only to types A and B\" or \"UID not honored\" + or \"name must be restricted\". Those cannot be well described + when embedded. 3. Inconsistent validation. Because the usages + are different, the validation rules are different by usage, which + makes it hard for users to predict what will happen. 4. The fields + are both imprecise and overly precise. Kind is not a precise + mapping to a URL. This can produce ambiguity during interpretation + and require a REST mapping. In most cases, the dependency is + on the group,resource tuple and the version of the actual struct + is irrelevant. 5. We cannot easily change it. Because this type + is embedded in many locations, updates to this type will affect + numerous schemas. Don't make new APIs embed an underspecified + API type they do not control. \n Instead of using this type, create + a locally provided and used type that is well-focused on your + reference. For example, ServiceReferences for admission registration: + https://github.com/kubernetes/api/blob/release-1.17/admissionregistration/v1/types.go#L533 + ." + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead of + an entire object, this string should contain a valid JSON/Go + field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen + only to have some well-defined way of referencing a part of + an object. TODO: this design is not final and this field is + subject to change in the future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + type: array budget: description: Budget is the amount of extra nodes that can be created as replacements for outdated nodes. diff --git a/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/templates/manager-rbac.yaml b/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/templates/manager-rbac.yaml index 23ff52eaa..45dddbdd9 100644 --- a/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/templates/manager-rbac.yaml +++ b/cli/internal/helm/charts/edgeless/operators/charts/constellation-operator/templates/manager-rbac.yaml @@ -6,6 +6,13 @@ metadata: labels: {{- include "chart.labels" . | nindent 4 }} rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/cli/internal/helm/testdata/Azure/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml b/cli/internal/helm/testdata/Azure/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml index 967edf44c..358a59ac8 100644 --- a/cli/internal/helm/testdata/Azure/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml +++ b/cli/internal/helm/testdata/Azure/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml @@ -9,6 +9,13 @@ metadata: app.kubernetes.io/instance: testRelease app.kubernetes.io/managed-by: Helm rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/cli/internal/helm/testdata/GCP/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml b/cli/internal/helm/testdata/GCP/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml index 967edf44c..358a59ac8 100644 --- a/cli/internal/helm/testdata/GCP/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml +++ b/cli/internal/helm/testdata/GCP/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml @@ -9,6 +9,13 @@ metadata: app.kubernetes.io/instance: testRelease app.kubernetes.io/managed-by: Helm rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/cli/internal/helm/testdata/QEMU/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml b/cli/internal/helm/testdata/QEMU/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml index 967edf44c..358a59ac8 100644 --- a/cli/internal/helm/testdata/QEMU/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml +++ b/cli/internal/helm/testdata/QEMU/constellation-operators/charts/constellation-operator/templates/manager-rbac.yaml @@ -9,6 +9,13 @@ metadata: app.kubernetes.io/instance: testRelease app.kubernetes.io/managed-by: Helm rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/internal/constants/constants.go b/internal/constants/constants.go index cae3a72e0..e13d01301 100644 --- a/internal/constants/constants.go +++ b/internal/constants/constants.go @@ -83,6 +83,8 @@ const ( KubectlPath = "/run/state/bin/kubectl" // UpgradeAgentSocketPath is the path to the UDS that is used for the gRPC connection to the upgrade agent. UpgradeAgentSocketPath = "/run/constellation-upgrade-agent.sock" + // UpgradeAgentMountPath is the path inside the operator container where the UDS is mounted. + UpgradeAgentMountPath = "/etc/constellation-upgrade-agent.sock" // CniPluginsDir path directory for CNI plugins. CniPluginsDir = "/opt/cni/bin" // BinDir install path for CNI config. diff --git a/operators/constellation-node-operator/api/v1alpha1/nodeversion_types.go b/operators/constellation-node-operator/api/v1alpha1/nodeversion_types.go index 21150ec0f..f5d81dfed 100644 --- a/operators/constellation-node-operator/api/v1alpha1/nodeversion_types.go +++ b/operators/constellation-node-operator/api/v1alpha1/nodeversion_types.go @@ -35,6 +35,8 @@ type NodeVersionStatus struct { Heirs []corev1.ObjectReference `json:"heirs,omitempty"` // Mints is a list of up to date nodes that will become heirs. Mints []corev1.ObjectReference `json:"mints,omitempty"` + // AwaitingAnnotation is a list of nodes that are waiting for the operator to annotate them. + AwaitingAnnotation []corev1.ObjectReference `json:"awaitingAnnotation,omitempty"` // Pending is a list of pending nodes (joining or leaving the cluster). Pending []corev1.ObjectReference `json:"pending,omitempty"` // Obsolete is a list of obsolete nodes (nodes that have been created by the operator but are no longer needed). @@ -45,6 +47,8 @@ type NodeVersionStatus struct { Budget uint32 `json:"budget"` // Conditions represent the latest available observations of an object's state Conditions []metav1.Condition `json:"conditions"` + // ActiveClusterVersionUpgrade indicates whether the cluster is currently upgrading. + ActiveClusterVersionUpgrade bool `json:"activeclusterversionupgrade"` } //+kubebuilder:object:root=true diff --git a/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go b/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go index 11be751ab..198fc055c 100644 --- a/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -302,6 +302,11 @@ func (in *NodeVersionStatus) DeepCopyInto(out *NodeVersionStatus) { *out = make([]v1.ObjectReference, len(*in)) copy(*out, *in) } + if in.AwaitingAnnotation != nil { + in, out := &in.AwaitingAnnotation, &out.AwaitingAnnotation + *out = make([]v1.ObjectReference, len(*in)) + copy(*out, *in) + } if in.Pending != nil { in, out := &in.Pending, &out.Pending *out = make([]v1.ObjectReference, len(*in)) diff --git a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_nodeversions.yaml b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_nodeversions.yaml index 6a99cd144..e4c435ec1 100644 --- a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_nodeversions.yaml +++ b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_nodeversions.yaml @@ -54,6 +54,74 @@ spec: status: description: NodeVersionStatus defines the observed state of NodeVersion. properties: + activeclusterversionupgrade: + description: ActiveClusterVersionUpgrade indicates whether the cluster + is currently upgrading. + type: boolean + awaitingAnnotation: + description: AwaitingAnnotation is a list of nodes that are waiting + for the operator to annotate them. + items: + description: "ObjectReference contains enough information to let + you inspect or modify the referred object. --- New uses of this + type are discouraged because of difficulty describing its usage + when embedded in APIs. 1. Ignored fields. It includes many fields + which are not generally honored. For instance, ResourceVersion + and FieldPath are both very rarely valid in actual usage. 2. Invalid + usage help. It is impossible to add specific help for individual + usage. In most embedded usages, there are particular restrictions + like, \"must refer only to types A and B\" or \"UID not honored\" + or \"name must be restricted\". Those cannot be well described + when embedded. 3. Inconsistent validation. Because the usages + are different, the validation rules are different by usage, which + makes it hard for users to predict what will happen. 4. The fields + are both imprecise and overly precise. Kind is not a precise + mapping to a URL. This can produce ambiguity during interpretation + and require a REST mapping. In most cases, the dependency is + on the group,resource tuple and the version of the actual struct + is irrelevant. 5. We cannot easily change it. Because this type + is embedded in many locations, updates to this type will affect + numerous schemas. Don't make new APIs embed an underspecified + API type they do not control. \n Instead of using this type, create + a locally provided and used type that is well-focused on your + reference. For example, ServiceReferences for admission registration: + https://github.com/kubernetes/api/blob/release-1.17/admissionregistration/v1/types.go#L533 + ." + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead of + an entire object, this string should contain a valid JSON/Go + field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen + only to have some well-defined way of referencing a part of + an object. TODO: this design is not final and this field is + subject to change in the future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + type: array budget: description: Budget is the amount of extra nodes that can be created as replacements for outdated nodes. @@ -643,6 +711,7 @@ spec: type: object type: array required: + - activeclusterversionupgrade - budget - conditions type: object diff --git a/operators/constellation-node-operator/config/rbac/role.yaml b/operators/constellation-node-operator/config/rbac/role.yaml index 670813d50..8700132c4 100644 --- a/operators/constellation-node-operator/config/rbac/role.yaml +++ b/operators/constellation-node-operator/config/rbac/role.yaml @@ -5,6 +5,13 @@ metadata: creationTimestamp: null name: manager-role rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller.go b/operators/constellation-node-operator/controllers/nodeversion_controller.go index 4f9c39f3b..1814403a6 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller.go @@ -8,18 +8,23 @@ package controllers import ( "context" + "encoding/json" + "errors" "reflect" "strings" "time" mainconstants "github.com/edgelesssys/constellation/v2/internal/constants" + "github.com/edgelesssys/constellation/v2/internal/versions/components" nodeutil "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/node" "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/patch" + "golang.org/x/mod/semver" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/version" ref "k8s.io/client-go/tools/reference" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" @@ -55,17 +60,21 @@ const ( type NodeVersionReconciler struct { nodeReplacer etcdRemover + clusterUpgrader + kubernetesServerVersionGetter client.Client Scheme *runtime.Scheme } // NewNodeVersionReconciler creates a new NodeVersionReconciler. -func NewNodeVersionReconciler(nodeReplacer nodeReplacer, etcdRemover etcdRemover, client client.Client, scheme *runtime.Scheme) *NodeVersionReconciler { +func NewNodeVersionReconciler(nodeReplacer nodeReplacer, etcdRemover etcdRemover, clusterUpgrader clusterUpgrader, k8sVerGetter kubernetesServerVersionGetter, client client.Client, scheme *runtime.Scheme) *NodeVersionReconciler { return &NodeVersionReconciler{ - nodeReplacer: nodeReplacer, - etcdRemover: etcdRemover, - Client: client, - Scheme: scheme, + nodeReplacer: nodeReplacer, + etcdRemover: etcdRemover, + clusterUpgrader: clusterUpgrader, + kubernetesServerVersionGetter: k8sVerGetter, + Client: client, + Scheme: scheme, } } @@ -75,6 +84,7 @@ func NewNodeVersionReconciler(nodeReplacer nodeReplacer, etcdRemover etcdRemover //+kubebuilder:rbac:groups=nodemaintenance.medik8s.io,resources=nodemaintenances,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups="",resources=nodes/status,verbs=get +//+kubebuilder:rbac:groups="",resources=configmaps,verbs=list;get // Reconcile replaces outdated nodes (using an old image) with new nodes (using a new image) as specified in the NodeVersion spec. func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -85,6 +95,17 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) if err := r.Get(ctx, req.NamespacedName, &desiredNodeVersion); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } + + // Check if we need to upgrade the cluster version. + serverVer, err := r.ServerVersion() + if err != nil { + return ctrl.Result{}, err + } + // GitVersion is the semantic version of the Kubernetes server e.g. "v1.24.9" + if semver.Compare(serverVer.GitVersion, desiredNodeVersion.Spec.KubernetesClusterVersion) != 0 { + r.tryStartClusterVersionUpgrade(ctx, req.NamespacedName) + } + // get list of autoscaling strategies // there should be exactly one autoscaling strategy but we do not specify its name. // if there is no autoscaling strategy, it is assumed that autoscaling is disabled. @@ -140,7 +161,7 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) // - being created (joining) // - being destroyed (leaving) // - heirs to outdated nodes - extraNodes := len(groups.Heirs) + len(pendingNodeList.Items) + extraNodes := len(groups.Heirs) + len(groups.AwaitingAnnotation) + len(pendingNodeList.Items) // newNodesBudget is the maximum number of new nodes that can be created in this Reconcile call. var newNodesBudget int if extraNodes < nodeOverprovisionLimit { @@ -153,7 +174,7 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) logr.Error(err, "Updating status") } - allNodesUpToDate := len(groups.Outdated)+len(groups.Heirs)+len(pendingNodeList.Items)+len(groups.Obsolete) == 0 + allNodesUpToDate := len(groups.Outdated)+len(groups.Heirs)+len(groups.AwaitingAnnotation)+len(pendingNodeList.Items)+len(groups.Obsolete) == 0 if err := r.ensureAutoscaling(ctx, autoscalingEnabled, allNodesUpToDate); err != nil { logr.Error(err, "Ensure autoscaling", "autoscalingEnabledIs", autoscalingEnabled, "autoscalingEnabledWant", allNodesUpToDate) return ctrl.Result{}, err @@ -235,6 +256,11 @@ func (r *NodeVersionReconciler) SetupWithManager(mgr ctrl.Manager) error { handler.EnqueueRequestsFromMapFunc(r.findAllNodeVersions), builder.WithPredicates(nodeMaintenanceSucceededPredicate()), ). + Watches( + &source.Kind{Type: &updatev1alpha1.JoiningNode{}}, + handler.EnqueueRequestsFromMapFunc(r.findAllNodeVersions), + builder.WithPredicates(joiningNodeDeletedPredicate()), + ). Owns(&updatev1alpha1.PendingNode{}). Complete(r) } @@ -284,6 +310,84 @@ func (r *NodeVersionReconciler) annotateNodes(ctx context.Context, nodes []corev return annotatedNodes, invalidNodes } +func (r *NodeVersionReconciler) tryStartClusterVersionUpgrade(ctx context.Context, nodeVersionName types.NamespacedName) { + // try to set the cluster version upgrade status to "in progress" + + // lock the node version for cluster upgrades + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + nodeVersion := &updatev1alpha1.NodeVersion{} + if err := r.Get(ctx, nodeVersionName, nodeVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to get node version") + return err + } + if nodeVersion.Status.ActiveClusterVersionUpgrade { + return errors.New("cluster version upgrade already in progress") + } + nodeVersion.Status.ActiveClusterVersionUpgrade = true + if err := r.Status().Update(ctx, nodeVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to update node version status") + return err + } + return nil + }); err != nil { + return + } + + // get clusterKubernetesVersion from nodeVersion + nodeVersion := &updatev1alpha1.NodeVersion{} + if err := r.Get(ctx, nodeVersionName, nodeVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to get node version") + return + } + + // get components configmap + componentsConfigMap := &corev1.ConfigMap{} + if err := r.Get(ctx, types.NamespacedName{Name: nodeVersion.Spec.KubernetesComponentsReference, Namespace: "kube-system"}, componentsConfigMap); err != nil { + log.FromContext(ctx).Error(err, "Unable to get components configmap") + return + } + + // unmarshal components from configmap + componentsRaw := componentsConfigMap.Data[mainconstants.ComponentsListKey] + var clusterComponents components.Components + if err := json.Unmarshal([]byte(componentsRaw), &clusterComponents); err != nil { + log.FromContext(ctx).Error(err, "Unable to unmarshal components") + return + } + + log.FromContext(ctx).Info("Starting cluster upgrade", "clusterVersion", nodeVersion.Spec.KubernetesClusterVersion) + + kubeadm, err := clusterComponents.GetKubeadmComponent() + if err != nil { + log.FromContext(ctx).Error(err, "Unable to get kubeadm component") + return + } + + // talk to the upgrade-agent to start the upgrade + if err := r.Upgrade(ctx, kubeadm.URL, kubeadm.Hash, nodeVersion.Spec.KubernetesClusterVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to upgrade cluster") + return + } + + // set the cluster version upgrade status to "completed" + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + nodeVersion := &updatev1alpha1.NodeVersion{} + if err := r.Get(ctx, nodeVersionName, nodeVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to get node version") + return err + } + nodeVersion.Status.ActiveClusterVersionUpgrade = false + if err := r.Status().Update(ctx, nodeVersion); err != nil { + log.FromContext(ctx).Error(err, "Unable to update node version status") + return err + } + return nil + }); err != nil { + log.FromContext(ctx).Error(err, "Unable to set cluster version upgrade status to completed") + return + } +} + // pairDonorsAndHeirs takes a list of outdated nodes (that do not yet have a heir node) and a list of mint nodes (nodes using the latest image) and pairs matching nodes to become donor and heir. // outdatedNodes is also updated with heir annotations. func (r *NodeVersionReconciler) pairDonorsAndHeirs(ctx context.Context, controller metav1.Object, outdatedNodes []corev1.Node, mintNodes []mintNode) []replacementPair { @@ -690,6 +794,13 @@ func nodeVersionStatus(scheme *runtime.Scheme, groups nodeGroups, pendingNodes [ } status.Heirs = append(status.Heirs, *nodeRef) } + for _, node := range groups.AwaitingAnnotation { + nodeRef, err := ref.GetReference(scheme, &node) + if err != nil { + continue + } + status.AwaitingAnnotation = append(status.Heirs, *nodeRef) + } for _, node := range groups.Obsolete { nodeRef, err := ref.GetReference(scheme, &node) if err != nil { @@ -756,6 +867,9 @@ type nodeGroups struct { // use the most recent version AND // are paired up with an outdated donor node Heirs, + // AwaitingAnnotation nodes are nodes that + // are missing annotations. + AwaitingAnnotation, // Obsolete nodes are nodes that // were created by the operator as replacements (heirs) // but could not get paired up with a donor node. @@ -776,6 +890,10 @@ func groupNodes(nodes []corev1.Node, pendingNodes []updatev1alpha1.PendingNode, groups.Obsolete = append(groups.Obsolete, node) continue } + if node.Annotations[nodeImageAnnotation] == "" || node.Annotations[mainconstants.NodeKubernetesComponentsAnnotationKey] == "" { + groups.AwaitingAnnotation = append(groups.AwaitingAnnotation, node) + continue + } if !strings.EqualFold(node.Annotations[nodeImageAnnotation], latestImageReference) || !strings.EqualFold(node.Annotations[mainconstants.NodeKubernetesComponentsAnnotationKey], latestK8sComponentsReference) { if heir := node.Annotations[heirAnnotation]; heir != "" { @@ -785,7 +903,7 @@ func groupNodes(nodes []corev1.Node, pendingNodes []updatev1alpha1.PendingNode, } continue } - if donor := node.Annotations[donorAnnotation]; donor != "" { + if node.Annotations[donorAnnotation] != "" { groups.Heirs = append(groups.Heirs, node) continue } @@ -817,6 +935,15 @@ type etcdRemover interface { RemoveEtcdMemberFromCluster(ctx context.Context, vpcIP string) error } +type clusterUpgrader interface { + // UpgradeCluster upgrades the cluster to the specified version. + Upgrade(ctx context.Context, KubeadmURL, KubeadmHash, WantedKubernetesVersion string) error +} + +type kubernetesServerVersionGetter interface { + ServerVersion() (*version.Info, error) +} + type newNodeConfig struct { desiredNodeVersion updatev1alpha1.NodeVersion outdatedNodes []corev1.Node diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller_env_test.go b/operators/constellation-node-operator/controllers/nodeversion_controller_env_test.go index 1e8d2ce27..c79d49ee3 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller_env_test.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller_env_test.go @@ -10,6 +10,7 @@ package controllers import ( "context" + "fmt" "time" . "github.com/onsi/ginkgo" @@ -19,6 +20,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + mainconstants "github.com/edgelesssys/constellation/v2/internal/constants" updatev1alpha1 "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/api/v1alpha1" nodemaintenancev1beta1 "github.com/medik8s/node-maintenance-operator/api/v1beta1" ) @@ -29,14 +31,16 @@ var _ = Describe("NodeVersion controller", func() { nodeVersionResourceName = "nodeversion" firstNodeName = "node-1" secondNodeName = "node-2" - firstVersion = "version-1" - secondVersion = "version-2" scalingGroupID = "scaling-group" timeout = time.Second * 20 duration = time.Second * 2 interval = time.Millisecond * 250 ) + firstVersionSpec := updatev1alpha1.NodeVersionSpec{ + ImageReference: "version-1", + KubernetesComponentsReference: "ref-1", + } firstNodeLookupKey := types.NamespacedName{Name: firstNodeName} secondNodeLookupKey := types.NamespacedName{Name: secondNodeName} @@ -46,9 +50,9 @@ var _ = Describe("NodeVersion controller", func() { nodeMaintenanceLookupKey := types.NamespacedName{Name: firstNodeName} Context("When updating the cluster-wide node version", func() { - It("Should update every node in the cluster", func() { + testNodeVersionUpdate := func(newNodeVersionSpec updatev1alpha1.NodeVersionSpec) { By("creating a node version resource specifying the first node version") - Expect(fakes.scalingGroupUpdater.SetScalingGroupImage(ctx, scalingGroupID, firstVersion)).Should(Succeed()) + Expect(fakes.scalingGroupUpdater.SetScalingGroupImage(ctx, scalingGroupID, firstVersionSpec.ImageReference)).Should(Succeed()) nodeVersion := &updatev1alpha1.NodeVersion{ TypeMeta: metav1.TypeMeta{ APIVersion: "update.edgeless.systems/v1alpha1", @@ -57,12 +61,12 @@ var _ = Describe("NodeVersion controller", func() { ObjectMeta: metav1.ObjectMeta{ Name: nodeVersionResourceName, }, - Spec: updatev1alpha1.NodeVersionSpec{ImageReference: firstVersion}, + Spec: firstVersionSpec, } Expect(k8sClient.Create(ctx, nodeVersion)).Should(Succeed()) By("creating a node resource using the first node image") - fakes.nodeReplacer.setNodeImage(firstNodeName, firstVersion) + fakes.nodeReplacer.setNodeImage(firstNodeName, firstVersionSpec.ImageReference) fakes.nodeReplacer.setScalingGroupID(firstNodeName, scalingGroupID) firstNode := &corev1.Node{ TypeMeta: metav1.TypeMeta{ @@ -74,6 +78,9 @@ var _ = Describe("NodeVersion controller", func() { Labels: map[string]string{ "custom-node-label": "custom-node-label-value", }, + Annotations: map[string]string{ + mainconstants.NodeKubernetesComponentsAnnotationKey: firstVersionSpec.KubernetesComponentsReference, + }, }, Spec: corev1.NodeSpec{ ProviderID: firstNodeName, @@ -82,7 +89,7 @@ var _ = Describe("NodeVersion controller", func() { Expect(k8sClient.Create(ctx, firstNode)).Should(Succeed()) By("creating a scaling group resource using the first node image") - Expect(fakes.scalingGroupUpdater.SetScalingGroupImage(ctx, scalingGroupID, firstVersion)).Should(Succeed()) + Expect(fakes.scalingGroupUpdater.SetScalingGroupImage(ctx, scalingGroupID, firstVersionSpec.ImageReference)).Should(Succeed()) scalingGroup := &updatev1alpha1.ScalingGroup{ ObjectMeta: metav1.ObjectMeta{ Name: scalingGroupID, @@ -152,10 +159,10 @@ var _ = Describe("NodeVersion controller", func() { return len(nodeVersion.Status.UpToDate) }, timeout, interval).Should(Equal(1)) - By("updating the node image to the second image") + By("updating the node version") fakes.nodeStateGetter.setNodeState(updatev1alpha1.NodeStateReady) fakes.nodeReplacer.setCreatedNode(secondNodeName, secondNodeName, nil) - nodeVersion.Spec.ImageReference = secondVersion + nodeVersion.Spec = newNodeVersionSpec Expect(k8sClient.Update(ctx, nodeVersion)).Should(Succeed()) By("checking that there is an outdated node in the status") @@ -164,7 +171,7 @@ var _ = Describe("NodeVersion controller", func() { return 0 } return len(nodeVersion.Status.Outdated) - }, timeout, interval).Should(Equal(1)) + }, timeout, interval).Should(Equal(1), "outdated nodes should be 1") By("checking that the scaling group is up to date") Eventually(func() string { @@ -172,7 +179,7 @@ var _ = Describe("NodeVersion controller", func() { return "" } return scalingGroup.Status.ImageReference - }, timeout, interval).Should(Equal(secondVersion)) + }, timeout, interval).Should(Equal(newNodeVersionSpec.ImageReference)) By("checking that a pending node is created") pendingNode := &updatev1alpha1.PendingNode{} @@ -190,8 +197,8 @@ var _ = Describe("NodeVersion controller", func() { return len(nodeVersion.Status.Pending) }, timeout, interval).Should(Equal(1)) - By("creating a new node resource using the second node image") - fakes.nodeReplacer.setNodeImage(secondNodeName, secondVersion) + By("creating a new node resource using the image from the new node version") + fakes.nodeReplacer.setNodeImage(secondNodeName, newNodeVersionSpec.ImageReference) fakes.nodeReplacer.setScalingGroupID(secondNodeName, scalingGroupID) secondNode := &corev1.Node{ TypeMeta: metav1.TypeMeta{ @@ -207,14 +214,52 @@ var _ = Describe("NodeVersion controller", func() { } Expect(k8sClient.Create(ctx, secondNode)).Should(Succeed()) - By("checking that the new node is properly annotated") - Eventually(func() map[string]string { - if err := k8sClient.Get(ctx, secondNodeLookupKey, secondNode); err != nil { - return nil + By("marking the new node as AwaitingAnnotation") + Eventually(func() int { + err := k8sClient.Get(ctx, nodeVersionLookupKey, nodeVersion) + if err != nil { + return 0 } - return secondNode.Annotations - }, timeout, interval).Should(HaveKeyWithValue(scalingGroupAnnotation, scalingGroupID)) - Expect(secondNode.Annotations).Should(HaveKeyWithValue(nodeImageAnnotation, secondVersion)) + return len(nodeVersion.Status.AwaitingAnnotation) + }, timeout, interval).Should(Equal(1)) + // add a JoiningNode CR for the new node + joiningNode := &updatev1alpha1.JoiningNode{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "update.edgeless.systems/v1alpha1", + Kind: "JoiningNode", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: secondNodeName, + }, + Spec: updatev1alpha1.JoiningNodeSpec{ + Name: secondNodeName, + ComponentsReference: newNodeVersionSpec.KubernetesComponentsReference, + }, + } + Expect(k8sClient.Create(ctx, joiningNode)).Should(Succeed()) + Eventually(func() int { + err := k8sClient.Get(ctx, nodeVersionLookupKey, nodeVersion) + if err != nil { + return 0 + } + return len(nodeVersion.Status.AwaitingAnnotation) + }, timeout, interval).Should(Equal(0)) + + By("checking that the new node is properly annotated") + Eventually(func() error { + if err := k8sClient.Get(ctx, secondNodeLookupKey, secondNode); err != nil { + return err + } + // check nodeImageAnnotation annotation + if _, ok := secondNode.Annotations[nodeImageAnnotation]; !ok { + return fmt.Errorf("node %s is missing %s annotation", secondNode.Name, nodeImageAnnotation) + } + // check mainconstants.NodeKubernetesComponentsAnnotationKey annotation + if _, ok := secondNode.Annotations[mainconstants.NodeKubernetesComponentsAnnotationKey]; !ok { + return fmt.Errorf("node %s is missing %s annotation", secondNode.Name, mainconstants.NodeKubernetesComponentsAnnotationKey) + } + return nil + }, timeout, interval).Should(Succeed()) By("checking that the nodes are paired as donor and heir") Eventually(func() map[string]string { @@ -281,6 +326,22 @@ var _ = Describe("NodeVersion controller", func() { Expect(k8sClient.Delete(ctx, autoscalerDeployment)).Should(Succeed()) Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed()) Expect(k8sClient.Delete(ctx, secondNode)).Should(Succeed()) + } + When("Updating the image reference", func() { + It("Should update every node in the cluster", func() { + testNodeVersionUpdate(updatev1alpha1.NodeVersionSpec{ + ImageReference: "version-2", + KubernetesComponentsReference: "ref-1", + }) + }) + }) + When("Updating the Kubernetes components reference", func() { + It("Should update every node in the cluster", func() { + testNodeVersionUpdate(updatev1alpha1.NodeVersionSpec{ + ImageReference: "version-1", + KubernetesComponentsReference: "ref-2", + }) + }) }) }) }) diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go index dd13485e1..4760f90ab 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go @@ -17,6 +17,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/version" mainconstants "github.com/edgelesssys/constellation/v2/internal/constants" updatev1alpha1 "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/api/v1alpha1" @@ -675,6 +676,28 @@ func TestGroupNodes(t *testing.T) { }, }, }, + AwaitingAnnotation: []corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "awaiting-annotation-missing-components-ref", + Annotations: map[string]string{ + scalingGroupAnnotation: scalingGroup, + nodeImageAnnotation: latestImageReference, + donorAnnotation: "donor", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "awaiting-annotation-missing-image-ref", + Annotations: map[string]string{ + scalingGroupAnnotation: scalingGroup, + mainconstants.NodeKubernetesComponentsAnnotationKey: latestK8sComponentsReference, + donorAnnotation: "donor", + }, + }, + }, + }, Obsolete: []corev1.Node{ { ObjectMeta: metav1.ObjectMeta{ @@ -717,6 +740,7 @@ func TestGroupNodes(t *testing.T) { nodes = append(nodes, wantNodeGroups.UpToDate...) nodes = append(nodes, wantNodeGroups.Donors...) nodes = append(nodes, wantNodeGroups.Heirs...) + nodes = append(nodes, wantNodeGroups.AwaitingAnnotation...) nodes = append(nodes, wantNodeGroups.Obsolete...) nodes = append(nodes, wantNodeGroups.Mint[0].node) pendingNodes := []updatev1alpha1.PendingNode{ @@ -792,6 +816,15 @@ func (r *stubNodeReplacer) setCreatedNode(nodeName, providerID string, err error r.createErr = err } +type stubKubernetesServerVersionGetter struct { + version string + err error +} + +func (g *stubKubernetesServerVersionGetter) ServerVersion() (*version.Info, error) { + return &version.Info{GitVersion: g.version}, g.err +} + type stubNodeReplacerReader struct { nodeImage string scalingGroupID string diff --git a/operators/constellation-node-operator/controllers/nodeversion_watches.go b/operators/constellation-node-operator/controllers/nodeversion_watches.go index 9c5965ff1..6cddc4cf2 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_watches.go +++ b/operators/constellation-node-operator/controllers/nodeversion_watches.go @@ -93,6 +93,16 @@ func nodeMaintenanceSucceededPredicate() predicate.Predicate { } } +// joiningNodeDeletedPredicate checks if a joining node was deleted. +func joiningNodeDeletedPredicate() predicate.Predicate { + return predicate.Funcs{ + DeleteFunc: func(e event.DeleteEvent) bool { + _, ok := e.Object.(*updatev1alpha1.JoiningNode) + return ok + }, + } +} + // findObjectsForScalingGroup requests a reconcile call for the node image referenced by a scaling group. func (r *NodeVersionReconciler) findObjectsForScalingGroup(rawScalingGroup client.Object) []reconcile.Request { scalingGroup := rawScalingGroup.(*updatev1alpha1.ScalingGroup) diff --git a/operators/constellation-node-operator/controllers/suite_test.go b/operators/constellation-node-operator/controllers/suite_test.go index c3ea4a367..0195b3e96 100644 --- a/operators/constellation-node-operator/controllers/suite_test.go +++ b/operators/constellation-node-operator/controllers/suite_test.go @@ -116,9 +116,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) err = (&NodeVersionReconciler{ - nodeReplacer: fakes.nodeReplacer, - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + kubernetesServerVersionGetter: fakes.k8sVerGetter, + nodeReplacer: fakes.nodeReplacer, + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) @@ -140,6 +141,7 @@ type fakeCollection struct { scalingGroupUpdater *fakeScalingGroupUpdater nodeStateGetter *stubNodeStateGetter nodeReplacer *stubNodeReplacer + k8sVerGetter *stubKubernetesServerVersionGetter clock *testclock.FakeClock } @@ -148,6 +150,7 @@ func newFakes() fakeCollection { scalingGroupUpdater: newFakeScalingGroupUpdater(), nodeStateGetter: &stubNodeStateGetter{}, nodeReplacer: &stubNodeReplacer{}, + k8sVerGetter: &stubKubernetesServerVersionGetter{}, clock: testclock.NewFakeClock(time.Now()), } } diff --git a/operators/constellation-node-operator/go.mod b/operators/constellation-node-operator/go.mod index 643d03260..ea201061c 100644 --- a/operators/constellation-node-operator/go.mod +++ b/operators/constellation-node-operator/go.mod @@ -19,6 +19,7 @@ require ( go.etcd.io/etcd/client/pkg/v3 v3.5.6 go.etcd.io/etcd/client/v3 v3.5.6 go.uber.org/multierr v1.9.0 + golang.org/x/mod v0.7.0 google.golang.org/api v0.106.0 google.golang.org/protobuf v1.28.1 k8s.io/api v0.25.4 @@ -93,7 +94,7 @@ require ( gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef // indirect - google.golang.org/grpc v1.51.0 // indirect + google.golang.org/grpc v1.51.0 gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/operators/constellation-node-operator/go.sum b/operators/constellation-node-operator/go.sum index 4065bddff..4fee94200 100644 --- a/operators/constellation-node-operator/go.sum +++ b/operators/constellation-node-operator/go.sum @@ -448,6 +448,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA= +golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/operators/constellation-node-operator/internal/upgrade/upgrade.go b/operators/constellation-node-operator/internal/upgrade/upgrade.go new file mode 100644 index 000000000..ac35741f7 --- /dev/null +++ b/operators/constellation-node-operator/internal/upgrade/upgrade.go @@ -0,0 +1,61 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ + +package upgrade + +import ( + "context" + "fmt" + "net" + + mainconstants "github.com/edgelesssys/constellation/v2/internal/constants" + + "github.com/edgelesssys/constellation/v2/upgrade-agent/upgradeproto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +// Client is a client for the upgrade agent. +type Client struct { + dialer Dialer +} + +// NewClient creates a new upgrade agent client. +func NewClient() *Client { + return &Client{ + dialer: &net.Dialer{}, + } +} + +// Upgrade upgrades the Constellation node to the given Kubernetes version. +func (c *Client) Upgrade(ctx context.Context, KubeadmURL, KubeadmHash, WantedKubernetesVersion string) error { + conn, err := grpc.DialContext(ctx, mainconstants.UpgradeAgentMountPath, grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithContextDialer( + func(ctx context.Context, addr string) (net.Conn, error) { + return c.dialer.DialContext(ctx, "unix", addr) + }, + )) + if err != nil { + return fmt.Errorf("failed to dial: %w", err) + } + defer conn.Close() + + protoClient := upgradeproto.NewUpdateClient(conn) + _, err = protoClient.ExecuteUpdate(ctx, &upgradeproto.ExecuteUpdateRequest{ + KubeadmUrl: KubeadmURL, + KubeadmHash: KubeadmHash, + WantedKubernetesVersion: WantedKubernetesVersion, + }) + if err != nil { + return fmt.Errorf("failed to execute update: %w", err) + } + return nil +} + +// Dialer is a dialer for the upgrade agent. +type Dialer interface { + DialContext(ctx context.Context, network string, address string) (net.Conn, error) +} diff --git a/operators/constellation-node-operator/main.go b/operators/constellation-node-operator/main.go index 8df1ef6ff..57c1b004f 100644 --- a/operators/constellation-node-operator/main.go +++ b/operators/constellation-node-operator/main.go @@ -14,6 +14,7 @@ import ( // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. + "k8s.io/client-go/discovery" _ "k8s.io/client-go/plugin/pkg/client/auth" "k8s.io/apimachinery/pkg/runtime" @@ -28,6 +29,7 @@ import ( cloudfake "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/cloud/fake/client" gcpclient "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/cloud/gcp/client" "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/deploy" + "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/upgrade" updatev1alpha1 "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/api/v1alpha1" "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/controllers" @@ -122,6 +124,11 @@ func main() { setupLog.Error(err, "Unable to create k8s client") os.Exit(1) } + discoveryClient, err := discovery.NewDiscoveryClientForConfig(ctrl.GetConfigOrDie()) + if err != nil { + setupLog.Error(err, "Unable to create discovery client") + os.Exit(1) + } etcdClient, err := etcd.New(k8sClient) if err != nil { setupLog.Error(err, "Unable to create etcd client") @@ -137,7 +144,7 @@ func main() { // Create Controllers if csp == "azure" || csp == "gcp" { if err = controllers.NewNodeVersionReconciler( - cspClient, etcdClient, mgr.GetClient(), mgr.GetScheme(), + cspClient, etcdClient, upgrade.NewClient(), discoveryClient, mgr.GetClient(), mgr.GetScheme(), ).SetupWithManager(mgr); err != nil { setupLog.Error(err, "Unable to create controller", "controller", "NodeVersion") os.Exit(1)