From e301f575df0e5ba5648e3588955516a26cbaba80 Mon Sep 17 00:00:00 2001 From: katexochen <49727155+katexochen@users.noreply.github.com> Date: Thu, 15 Sep 2022 16:48:32 +0200 Subject: [PATCH] Let operator manage autoscaling of node groups --- .../api/v1alpha1/autoscalingstrategy_types.go | 2 + .../api/v1alpha1/scalinggroup_types.go | 17 ++ .../api/v1alpha1/zz_generated.deepcopy.go | 9 +- ...dgeless.systems_autoscalingstrategies.yaml | 6 + ...update.edgeless.systems_scalinggroups.yaml | 17 ++ .../autoscalingstrategy_controller.go | 139 +++++++++++++- ...autoscalingstrategy_controller_env_test.go | 180 ++++++++++++++++++ .../controllers/pendingnode_controller.go | 2 +- .../internal/azure/client/autoscaler.go | 12 ++ .../internal/azure/client/scalinggroup.go | 13 +- .../azure/client/scalinggroup_test.go | 6 +- .../internal/deploy/deploy.go | 47 +++-- .../internal/deploy/deploy_test.go | 46 ++++- .../internal/gcp/client/autocaler.go | 12 ++ .../internal/gcp/client/disks.go | 9 + .../internal/gcp/client/scalinggroup.go | 16 +- .../internal/gcp/client/scalinggroup_test.go | 2 +- operators/constellation-node-operator/main.go | 6 +- 18 files changed, 499 insertions(+), 42 deletions(-) create mode 100644 operators/constellation-node-operator/internal/azure/client/autoscaler.go create mode 100644 operators/constellation-node-operator/internal/gcp/client/autocaler.go diff --git a/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go b/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go index b68fe686d..590450583 100644 --- a/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go +++ b/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go @@ -18,6 +18,8 @@ type AutoscalingStrategySpec struct { DeploymentName string `json:"deploymentName"` // DeploymentNamespace defines the namespace of the autoscaler deployment. DeploymentNamespace string `json:"deploymentNamespace"` + // AutoscalerExtraArgs defines extra arguments to be passed to the autoscaler. + AutoscalerExtraArgs map[string]string `json:"autoscalerExtraArgs,omitempty"` } // AutoscalingStrategyStatus defines the observed state of AutoscalingStrategy. diff --git a/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go b/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go index 7fa358ad6..9645854c8 100644 --- a/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go +++ b/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go @@ -13,6 +13,11 @@ import ( const ( // ConditionOutdated is used to signal outdated scaling groups. ConditionOutdated = "Outdated" + + // WorkerRole is used to signal worker scaling groups. + WorkerRole NodeRole = "Worker" + // ControlPlaneRole is used to signal control plane scaling groups. + ControlPlaneRole NodeRole = "ControlPlane" ) // ScalingGroupSpec defines the desired state of ScalingGroup. @@ -21,10 +26,22 @@ type ScalingGroupSpec struct { NodeImage string `json:"nodeImage,omitempty"` // GroupID is the CSP specific, canonical identifier of a scaling group. GroupID string `json:"groupId,omitempty"` + // AutoscalerGroupName is name that is expected by the autoscaler. + AutoscalerGroupName string `json:"autoscalerGroupName,omitempty"` // Autoscaling specifies wether the scaling group should automatically scale using the cluster-autoscaler. Autoscaling bool `json:"autoscaling,omitempty"` + // Min is the minimum number of nodes in the scaling group (used by cluster-autoscaler). + Min int32 `json:"min,omitempty"` + // Max is the maximum number of autoscaled nodes in the scaling group (used by cluster-autoscaler). + Max int32 `json:"max,omitempty"` + // Role is the role of the nodes in the scaling group. + Role NodeRole `json:"role,omitempty"` } +// NodeRole is the role of a node. +// +kubebuilder:validation:Enum=Worker;ControlPlane +type NodeRole string + // ScalingGroupStatus defines the observed state of ScalingGroup. type ScalingGroupStatus struct { // ImageReference is the image currently used for newly created nodes in this scaling group. diff --git a/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go b/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go index 785a1dcb8..3f33a89ee 100644 --- a/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -16,7 +16,7 @@ func (in *AutoscalingStrategy) DeepCopyInto(out *AutoscalingStrategy) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) out.Status = in.Status } @@ -73,6 +73,13 @@ func (in *AutoscalingStrategyList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AutoscalingStrategySpec) DeepCopyInto(out *AutoscalingStrategySpec) { *out = *in + if in.AutoscalerExtraArgs != nil { + in, out := &in.AutoscalerExtraArgs, &out.AutoscalerExtraArgs + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingStrategySpec. diff --git a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml index de2496f10..6a014a394 100644 --- a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml +++ b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml @@ -36,6 +36,12 @@ spec: spec: description: AutoscalingStrategySpec defines the desired state of AutoscalingStrategy. properties: + autoscalerExtraArgs: + additionalProperties: + type: string + description: AutoscalerExtraArgs defines extra arguments to be passed + to the autoscaler. + type: object deploymentName: description: DeploymentName defines the name of the autoscaler deployment. type: string diff --git a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml index 828320226..7d55e273d 100644 --- a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml +++ b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml @@ -35,6 +35,9 @@ spec: spec: description: ScalingGroupSpec defines the desired state of ScalingGroup. properties: + autoscalerGroupName: + description: AutoscalerGroupName is name that is expected by the autoscaler. + type: string autoscaling: description: Autoscaling specifies wether the scaling group should automatically scale using the cluster-autoscaler. @@ -43,9 +46,23 @@ spec: description: GroupID is the CSP specific, canonical identifier of a scaling group. type: string + max: + description: Max is the maximum number of nodes in the scaling group. + format: int32 + type: integer + min: + description: Min is the minimum number of nodes in the scaling group. + format: int32 + type: integer nodeImage: description: NodeImage is the name of the NodeImage resource. type: string + role: + description: Role is the role of the nodes in the scaling group. + enum: + - Worker + - ControlPlane + type: string type: object status: description: ScalingGroupStatus defines the observed state of ScalingGroup. diff --git a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go index 1d3a75be7..24a55603e 100644 --- a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go +++ b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go @@ -8,14 +8,22 @@ package controllers import ( "context" + "fmt" + "sort" appsv1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" updatev1alpha1 "github.com/edgelesssys/constellation/operators/constellation-node-operator/api/v1alpha1" ) @@ -29,6 +37,7 @@ type AutoscalingStrategyReconciler struct { //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/status,verbs=get;update;patch //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/finalizers,verbs=update +//+kubebuilder:rbac:groups=update.edgeless.systems,resources=scalinggroups,verbs=get;list;watch //+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;delete // Reconcile enabled or disables the cluster-autoscaler based on the AutoscalingStrategy spec @@ -46,8 +55,32 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl. expectedReplicas = 1 } + var scalingGroups updatev1alpha1.ScalingGroupList + if err := r.List(ctx, &scalingGroups); err != nil { + logr.Error(err, "Unable to fetch ScalingGroups") + return ctrl.Result{}, err + } + + autoscalerArgs := []string{"./cluster-autoscaler"} + for key, val := range desiredAutoscalingStrategy.Spec.AutoscalerExtraArgs { + autoscalerArgs = append(autoscalerArgs, "--"+key+"="+val) + } + const nodeGroupFmt = "--nodes=%d:%d:%s" + for _, group := range scalingGroups.Items { + // Don't autoscale control plane nodes for safety reasons. + if group.Spec.Autoscaling && group.Spec.Role != updatev1alpha1.ControlPlaneRole { + groupArg := fmt.Sprintf(nodeGroupFmt, group.Spec.Min, group.Spec.Max, group.Spec.AutoscalerGroupName) + autoscalerArgs = append(autoscalerArgs, groupArg) + } + } + sort.Strings(autoscalerArgs[1:]) + var autoscalerDeployment appsv1.Deployment - if err := r.Get(ctx, client.ObjectKey{Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace, Name: desiredAutoscalingStrategy.Spec.DeploymentName}, &autoscalerDeployment); err != nil { + deploymentKey := client.ObjectKey{ + Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace, + Name: desiredAutoscalingStrategy.Spec.DeploymentName, + } + if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil { logr.Error(err, "Unable to fetch autoscaler Deployment") return ctrl.Result{}, client.IgnoreNotFound(err) } @@ -65,26 +98,116 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl. return ctrl.Result{}, err } + var needUpdate bool if autoscalerDeployment.Spec.Replicas == nil || *autoscalerDeployment.Spec.Replicas != expectedReplicas { - logr.Info("Updating autoscaling replicas", "expectedReplicas", expectedReplicas) - autoscalerDeployment.Spec.Replicas = &expectedReplicas - if err := r.Update(ctx, &autoscalerDeployment); err != nil { - logr.Error(err, "Unable to update autoscaler Deployment") - return ctrl.Result{}, err + needUpdate = needUpdate || true + } + containers := autoscalerDeployment.Spec.Template.Spec.Containers + if len(containers) != 0 && containers[0].Command == nil { // uninitialized + needUpdate = needUpdate || true + } + if len(containers) != 0 && containers[0].Command != nil { // args updated + if len(containers[0].Command) != len(autoscalerArgs) { + needUpdate = needUpdate || true + } else { + for i, arg := range containers[0].Command { + if arg != autoscalerArgs[i] { + needUpdate = needUpdate || true + break + } + } } - return ctrl.Result{Requeue: true}, nil } - return ctrl.Result{}, nil + if !needUpdate { + return ctrl.Result{}, nil + } + + logr.Info("Updating autoscaling replicas and command", "expectedReplicas", expectedReplicas, "autoscalerArgs", autoscalerArgs) + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil { + return err + } + autoscalerDeployment.Spec.Replicas = &expectedReplicas + if len(containers) != 0 { + logr.Info("Updating autoscaler command", "old", containers[0].Command, "new", autoscalerArgs) + autoscalerDeployment.Spec.Template.Spec.Containers[0].Command = autoscalerArgs + } + return r.Update(ctx, &autoscalerDeployment) + }) + if err != nil { + logr.Error(err, "Unable to update autoscaler Deployment") + return ctrl.Result{}, err + } + + return ctrl.Result{Requeue: true}, nil } // SetupWithManager sets up the controller with the Manager. func (r *AutoscalingStrategyReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&updatev1alpha1.AutoscalingStrategy{}). + Watches( + &source.Kind{Type: &updatev1alpha1.ScalingGroup{}}, + handler.EnqueueRequestsFromMapFunc(r.findObjectsForDeployment), + builder.WithPredicates(scalingGroupChangePredicate()), + ). Complete(r) } +func (r *AutoscalingStrategyReconciler) findObjectsForDeployment(_ client.Object) []reconcile.Request { + var autoscalingStrats updatev1alpha1.AutoscalingStrategyList + err := r.List(context.Background(), &autoscalingStrats) + if err != nil { + return []reconcile.Request{} + } + + requests := make([]reconcile.Request, len(autoscalingStrats.Items)) + for i, item := range autoscalingStrats.Items { + requests[i] = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: item.GetName(), + }, + } + } + return requests +} + +// scalingGroupChangePredicate filters events on scaling group resources. +func scalingGroupChangePredicate() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return true + }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldGroup, ok := e.ObjectOld.(*updatev1alpha1.ScalingGroup) + if !ok { + return false + } + newGroup, ok := e.ObjectNew.(*updatev1alpha1.ScalingGroup) + if !ok { + return false + } + switch { + case oldGroup.Spec.Min != newGroup.Spec.Min: + return true + case oldGroup.Spec.Max != newGroup.Spec.Max: + return true + case oldGroup.Spec.Autoscaling != newGroup.Spec.Autoscaling: + return true + default: + return false + } + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return true + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + } +} + // tryUpdateStatus attempts to update the AutoscalingStrategy status field in a retry loop. func (r *AutoscalingStrategyReconciler) tryUpdateStatus(ctx context.Context, name types.NamespacedName, status updatev1alpha1.AutoscalingStrategyStatus) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { diff --git a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go index 81c9c4976..7957e854e 100644 --- a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go +++ b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go @@ -28,6 +28,8 @@ var _ = Describe("AutoscalingStrategy controller", func() { ClusterAutoscalerDeploymentName = "cluster-autoscaler" ClusterAutoscalerDeploymentNamespace = "kube-system" AutoscalingStrategyName = "test-strategy" + ScalingGroupNameWorker = "worker-group" + ScalingGroupNameControlPlane = "control-plane-group" timeout = time.Second * 20 duration = time.Second * 2 @@ -162,4 +164,182 @@ var _ = Describe("AutoscalingStrategy controller", func() { Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed()) }) }) + + Context("When changing autoscaling of node groups", func() { + It("Should update the autoscaler deployment command", func() { + By("creating a cluster-autoscaler deployment") + ctx := context.Background() + autoscalerDeployment := &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "apps/v1", + Kind: "Deployment", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: ClusterAutoscalerDeploymentName, + Namespace: ClusterAutoscalerDeploymentNamespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &ClusterAutoscalerStartingReplicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "cluster-autoscaler", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "cluster-autoscaler", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Image: "cluster-autoscaler", Name: "cluster-autoscaler"}, + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, autoscalerDeployment)).Should(Succeed()) + createdDeployment := &appsv1.Deployment{} + Eventually(func() error { + return k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment) + }, timeout, interval).Should(Succeed()) + + Expect(createdDeployment.Spec.Template.Spec.Containers).NotTo(BeEmpty()) + Expect(createdDeployment.Spec.Template.Spec.Containers[0].Command).To(BeEmpty()) + + By("creating an autoscaling strategy") + strategy := &updatev1alpha1.AutoscalingStrategy{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "update.edgeless.systems/v1alpha1", + Kind: "AutoscalingStrategy", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: AutoscalingStrategyName, + }, + Spec: updatev1alpha1.AutoscalingStrategySpec{ + DeploymentName: ClusterAutoscalerDeploymentName, + DeploymentNamespace: ClusterAutoscalerDeploymentNamespace, + AutoscalerExtraArgs: map[string]string{ + "foo": "bar", + "baz": "qux", + }, + }, + } + Expect(k8sClient.Create(ctx, strategy)).Should(Succeed()) + strategyLookupKey := types.NamespacedName{Name: AutoscalingStrategyName} + createdStrategy := &updatev1alpha1.AutoscalingStrategy{} + Eventually(func() error { + return k8sClient.Get(ctx, strategyLookupKey, createdStrategy) + }, timeout, interval).Should(Succeed()) + + By("checking the autoscaling deployment eventually has the correct command") + Eventually(func() []string { + err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment) + if err != nil { + return nil + } + return createdDeployment.Spec.Template.Spec.Containers[0].Command + }, timeout, interval).Should(Equal([]string{ + "./cluster-autoscaler", + "--baz=qux", + "--foo=bar", + })) + + By("creating a new worker scaling group") + scalingGroup := &updatev1alpha1.ScalingGroup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "update.edgeless.systems/v1alpha1", + Kind: "ScalingGroup", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: ScalingGroupNameWorker, + }, + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: ScalingGroupNameWorker, + AutoscalerGroupName: ScalingGroupNameWorker, + Autoscaling: true, + Min: 1, + Max: 10, + Role: updatev1alpha1.WorkerRole, + }, + } + Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed()) + scalingGroupLookupKey := types.NamespacedName{Name: ScalingGroupNameWorker} + Eventually(func() error { + return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup) + }, timeout, interval).Should(Succeed()) + By("checking the controller eventually updates the autoscaler deployment command") + Eventually(func() []string { + err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment) + if err != nil { + return nil + } + return createdDeployment.Spec.Template.Spec.Containers[0].Command + }, timeout, interval).Should(Equal([]string{ + "./cluster-autoscaler", + "--baz=qux", + "--foo=bar", + "--nodes=1:10:worker-group", + })) + + By("Disabling autoscaling for the worker scaling group") + Eventually(func() error { + scalingGroup.Spec.Autoscaling = false + return k8sClient.Update(ctx, scalingGroup) + }, timeout, interval).Should(Succeed()) + By("checking the controller eventually updates the autoscaler deployment command") + Eventually(func() []string { + err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment) + if err != nil { + return nil + } + return createdDeployment.Spec.Template.Spec.Containers[0].Command + }, timeout, interval).Should(Equal([]string{ + "./cluster-autoscaler", + "--baz=qux", + "--foo=bar", + })) + + By("creating a new control plane scaling group") + scalingGroup = &updatev1alpha1.ScalingGroup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "update.edgeless.systems/v1alpha1", + Kind: "ScalingGroup", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: ScalingGroupNameControlPlane, + }, + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: ScalingGroupNameControlPlane, + AutoscalerGroupName: ScalingGroupNameControlPlane, + Autoscaling: true, + Min: 1, + Max: 10, + Role: updatev1alpha1.ControlPlaneRole, + }, + } + Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed()) + scalingGroupLookupKey = types.NamespacedName{Name: ScalingGroupNameControlPlane} + Eventually(func() error { + return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup) + }, timeout, interval).Should(Succeed()) + By("checking the controller doesn't update the control plane node group autoscaling") + Consistently(func() []string { + err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment) + if err != nil { + return nil + } + return createdDeployment.Spec.Template.Spec.Containers[0].Command + }, timeout, interval).Should(Equal([]string{ + "./cluster-autoscaler", + "--baz=qux", + "--foo=bar", + })) + + By("cleaning up all resources") + Expect(k8sClient.Delete(ctx, autoscalerDeployment)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed()) + }) + }) }) diff --git a/operators/constellation-node-operator/controllers/pendingnode_controller.go b/operators/constellation-node-operator/controllers/pendingnode_controller.go index 7bcb0a688..c511225b8 100644 --- a/operators/constellation-node-operator/controllers/pendingnode_controller.go +++ b/operators/constellation-node-operator/controllers/pendingnode_controller.go @@ -193,7 +193,7 @@ func nodeStateChangePredicate() predicate.Predicate { // findObjectsForNode requests reconciliation for PendingNode whenever the corresponding Node state changes. func (r *PendingNodeReconciler) findObjectsForNode(rawNode client.Object) []reconcile.Request { var pendingNodesList updatev1alpha1.PendingNodeList - err := r.List(context.TODO(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()}) + err := r.List(context.Background(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()}) if err != nil { return []reconcile.Request{} } diff --git a/operators/constellation-node-operator/internal/azure/client/autoscaler.go b/operators/constellation-node-operator/internal/azure/client/autoscaler.go new file mode 100644 index 000000000..d3c268f93 --- /dev/null +++ b/operators/constellation-node-operator/internal/azure/client/autoscaler.go @@ -0,0 +1,12 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ + +package client + +// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler. +func (c *Client) AutoscalingCloudProvider() string { + return "azure" +} diff --git a/operators/constellation-node-operator/internal/azure/client/scalinggroup.go b/operators/constellation-node-operator/internal/azure/client/scalinggroup.go index 3fece382d..e2f63f6de 100644 --- a/operators/constellation-node-operator/internal/azure/client/scalinggroup.go +++ b/operators/constellation-node-operator/internal/azure/client/scalinggroup.go @@ -63,13 +63,20 @@ func (c *Client) SetScalingGroupImage(ctx context.Context, scalingGroupID, image return nil } -// GetScalingGroupName retrieves the name of a scaling group. -func (c *Client) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) { +// GetScalingGroupName retrieves the name of a scaling group, as expected by Kubernetes. +// This keeps the casing of the original name, but Kubernetes requires the name to be lowercase, +// so use strings.ToLower() on the result if using the name in a Kubernetes context. +func (c *Client) GetScalingGroupName(scalingGroupID string) (string, error) { _, _, scaleSet, err := splitVMSSID(scalingGroupID) if err != nil { return "", fmt.Errorf("getting scaling group name: %w", err) } - return strings.ToLower(scaleSet), nil + return scaleSet, nil +} + +// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler. +func (c *Client) GetAutoscalingGroupName(scalingGroupID string) (string, error) { + return c.GetScalingGroupName(scalingGroupID) } // ListScalingGroups retrieves a list of scaling groups for the cluster. diff --git a/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go b/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go index fb8887c5e..fb2985691 100644 --- a/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go +++ b/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go @@ -155,9 +155,9 @@ func TestGetScalingGroupName(t *testing.T) { scalingGroupID: "/subscriptions/subscription-id/resourceGroups/resource-group/providers/Microsoft.Compute/virtualMachineScaleSets/scale-set-name", wantName: "scale-set-name", }, - "uppercase name is lowercased": { + "uppercase name isn't lowercased": { scalingGroupID: "/subscriptions/subscription-id/resourceGroups/resource-group/providers/Microsoft.Compute/virtualMachineScaleSets/SCALE-SET-NAME", - wantName: "scale-set-name", + wantName: "SCALE-SET-NAME", }, "splitting scalingGroupID fails": { scalingGroupID: "invalid", @@ -171,7 +171,7 @@ func TestGetScalingGroupName(t *testing.T) { require := require.New(t) client := Client{} - gotName, err := client.GetScalingGroupName(context.Background(), tc.scalingGroupID) + gotName, err := client.GetScalingGroupName(tc.scalingGroupID) if tc.wantErr { assert.Error(err) return diff --git a/operators/constellation-node-operator/internal/deploy/deploy.go b/operators/constellation-node-operator/internal/deploy/deploy.go index 55719231f..bd790a032 100644 --- a/operators/constellation-node-operator/internal/deploy/deploy.go +++ b/operators/constellation-node-operator/internal/deploy/deploy.go @@ -11,6 +11,7 @@ import ( "context" "errors" "fmt" + "strings" updatev1alpha1 "github.com/edgelesssys/constellation/operators/constellation-node-operator/api/v1alpha1" "github.com/edgelesssys/constellation/operators/constellation-node-operator/internal/constants" @@ -32,7 +33,7 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup return errors.New("determining initial node image: no worker scaling group found") } - if err := createAutoscalingStrategy(ctx, k8sClient); err != nil { + if err := createAutoscalingStrategy(ctx, k8sClient, scalingGroupGetter.AutoscalingCloudProvider()); err != nil { return fmt.Errorf("creating initial autoscaling strategy: %w", err) } imageReference, err := scalingGroupGetter.GetScalingGroupImage(ctx, controlPlaneGroupIDs[0]) @@ -43,20 +44,28 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup return fmt.Errorf("creating initial node image %q: %w", imageReference, err) } for _, groupID := range controlPlaneGroupIDs { - groupName, err := scalingGroupGetter.GetScalingGroupName(ctx, groupID) + groupName, err := scalingGroupGetter.GetScalingGroupName(groupID) if err != nil { return fmt.Errorf("determining scaling group name of %q: %w", groupID, err) } - if err := createScalingGroup(ctx, k8sClient, groupID, groupName, false); err != nil { + autoscalingGroupName, err := scalingGroupGetter.GetAutoscalingGroupName(groupID) + if err != nil { + return fmt.Errorf("determining autoscaling group name of %q: %w", groupID, err) + } + if err := createScalingGroup(ctx, k8sClient, groupID, groupName, autoscalingGroupName, updatev1alpha1.ControlPlaneRole); err != nil { return fmt.Errorf("creating initial control plane scaling group: %w", err) } } for _, groupID := range workerGroupIDs { - groupName, err := scalingGroupGetter.GetScalingGroupName(ctx, groupID) + groupName, err := scalingGroupGetter.GetScalingGroupName(groupID) if err != nil { return fmt.Errorf("determining scaling group name of %q: %w", groupID, err) } - if err := createScalingGroup(ctx, k8sClient, groupID, groupName, true); err != nil { + autoscalingGroupName, err := scalingGroupGetter.GetAutoscalingGroupName(groupID) + if err != nil { + return fmt.Errorf("determining autoscaling group name of %q: %w", groupID, err) + } + if err := createScalingGroup(ctx, k8sClient, groupID, groupName, autoscalingGroupName, updatev1alpha1.WorkerRole); err != nil { return fmt.Errorf("creating initial worker scaling group: %w", err) } } @@ -64,7 +73,7 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup } // createAutoscalingStrategy creates the autoscaling strategy resource if it does not exist yet. -func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer) error { +func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer, provider string) error { err := k8sClient.Create(ctx, &updatev1alpha1.AutoscalingStrategy{ TypeMeta: metav1.TypeMeta{APIVersion: "update.edgeless.systems/v1alpha1", Kind: "AutoscalingStrategy"}, ObjectMeta: metav1.ObjectMeta{ @@ -74,6 +83,13 @@ func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer) err Enabled: true, DeploymentName: "constellation-cluster-autoscaler", DeploymentNamespace: "kube-system", + AutoscalerExtraArgs: map[string]string{ + "cloud-provider": provider, + "logtostderr": "true", + "stderrthreshold": "info", + "v": "2", + "namespace": "kube-system", + }, }, }) if k8sErrors.IsAlreadyExists(err) { @@ -100,16 +116,19 @@ func createNodeImage(ctx context.Context, k8sClient client.Writer, imageReferenc } // createScalingGroup creates an initial scaling group resource if it does not exist yet. -func createScalingGroup(ctx context.Context, k8sClient client.Writer, groupID, groupName string, autoscaling bool) error { +func createScalingGroup(ctx context.Context, k8sClient client.Writer, groupID, groupName, autoscalingGroupName string, role updatev1alpha1.NodeRole) error { err := k8sClient.Create(ctx, &updatev1alpha1.ScalingGroup{ TypeMeta: metav1.TypeMeta{APIVersion: "update.edgeless.systems/v1alpha1", Kind: "ScalingGroup"}, ObjectMeta: metav1.ObjectMeta{ - Name: groupName, + Name: strings.ToLower(groupName), }, Spec: updatev1alpha1.ScalingGroupSpec{ - NodeImage: constants.NodeImageResourceName, - GroupID: groupID, - Autoscaling: autoscaling, + NodeImage: constants.NodeImageResourceName, + GroupID: groupID, + AutoscalerGroupName: autoscalingGroupName, + Min: 1, + Max: 10, + Role: role, }, }) if k8sErrors.IsAlreadyExists(err) { @@ -122,7 +141,11 @@ type scalingGroupGetter interface { // GetScalingGroupImage retrieves the image currently used by a scaling group. GetScalingGroupImage(ctx context.Context, scalingGroupID string) (string, error) // GetScalingGroupName retrieves the name of a scaling group. - GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) + GetScalingGroupName(scalingGroupID string) (string, error) + // GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler. + GetAutoscalingGroupName(scalingGroupID string) (string, error) // ListScalingGroups retrieves a list of scaling groups for the cluster. ListScalingGroups(ctx context.Context, uid string) (controlPlaneGroupIDs []string, workerGroupIDs []string, err error) + // AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler. + AutoscalingCloudProvider() string } diff --git a/operators/constellation-node-operator/internal/deploy/deploy_test.go b/operators/constellation-node-operator/internal/deploy/deploy_test.go index 4f5a25fb1..fc87f9c1d 100644 --- a/operators/constellation-node-operator/internal/deploy/deploy_test.go +++ b/operators/constellation-node-operator/internal/deploy/deploy_test.go @@ -114,6 +114,13 @@ func TestCreateAutoscalingStrategy(t *testing.T) { Enabled: true, DeploymentName: "constellation-cluster-autoscaler", DeploymentNamespace: "kube-system", + AutoscalerExtraArgs: map[string]string{ + "cloud-provider": "stub", + "logtostderr": "true", + "stderrthreshold": "info", + "v": "2", + "namespace": "kube-system", + }, }, }, }, @@ -132,6 +139,13 @@ func TestCreateAutoscalingStrategy(t *testing.T) { Enabled: true, DeploymentName: "constellation-cluster-autoscaler", DeploymentNamespace: "kube-system", + AutoscalerExtraArgs: map[string]string{ + "cloud-provider": "stub", + "logtostderr": "true", + "stderrthreshold": "info", + "v": "2", + "namespace": "kube-system", + }, }, }, }, @@ -143,7 +157,7 @@ func TestCreateAutoscalingStrategy(t *testing.T) { require := require.New(t) k8sClient := &stubK8sClient{createErr: tc.createErr} - err := createAutoscalingStrategy(context.Background(), k8sClient) + err := createAutoscalingStrategy(context.Background(), k8sClient, "stub") if tc.wantErr { assert.Error(err) return @@ -221,9 +235,12 @@ func TestCreateScalingGroup(t *testing.T) { Name: "group-name", }, Spec: updatev1alpha1.ScalingGroupSpec{ - NodeImage: constants.NodeImageResourceName, - GroupID: "group-id", - Autoscaling: true, + NodeImage: constants.NodeImageResourceName, + GroupID: "group-id", + AutoscalerGroupName: "group-Name", + Min: 1, + Max: 10, + Role: updatev1alpha1.WorkerRole, }, }, }, @@ -239,9 +256,12 @@ func TestCreateScalingGroup(t *testing.T) { Name: "group-name", }, Spec: updatev1alpha1.ScalingGroupSpec{ - NodeImage: constants.NodeImageResourceName, - GroupID: "group-id", - Autoscaling: true, + NodeImage: constants.NodeImageResourceName, + GroupID: "group-id", + AutoscalerGroupName: "group-Name", + Min: 1, + Max: 10, + Role: updatev1alpha1.WorkerRole, }, }, }, @@ -253,7 +273,7 @@ func TestCreateScalingGroup(t *testing.T) { require := require.New(t) k8sClient := &stubK8sClient{createErr: tc.createErr} - err := createScalingGroup(context.Background(), k8sClient, "group-id", "group-name", true) + err := createScalingGroup(context.Background(), k8sClient, "group-id", "group-Name", "group-Name", updatev1alpha1.WorkerRole) if tc.wantErr { assert.Error(err) return @@ -300,7 +320,11 @@ func (g *stubScalingGroupGetter) GetScalingGroupImage(ctx context.Context, scali return g.store[scalingGroupID].image, g.imageErr } -func (g *stubScalingGroupGetter) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) { +func (g *stubScalingGroupGetter) GetScalingGroupName(scalingGroupID string) (string, error) { + return g.store[scalingGroupID].name, g.nameErr +} + +func (g *stubScalingGroupGetter) GetAutoscalingGroupName(scalingGroupID string) (string, error) { return g.store[scalingGroupID].name, g.nameErr } @@ -315,6 +339,10 @@ func (g *stubScalingGroupGetter) ListScalingGroups(ctx context.Context, uid stri return controlPlaneGroupIDs, workerGroupIDs, g.listErr } +func (g *stubScalingGroupGetter) AutoscalingCloudProvider() string { + return "stub" +} + type scalingGroupStoreItem struct { groupID string name string diff --git a/operators/constellation-node-operator/internal/gcp/client/autocaler.go b/operators/constellation-node-operator/internal/gcp/client/autocaler.go new file mode 100644 index 000000000..6ac106353 --- /dev/null +++ b/operators/constellation-node-operator/internal/gcp/client/autocaler.go @@ -0,0 +1,12 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ + +package client + +// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler. +func (c *Client) AutoscalingCloudProvider() string { + return "gce" +} diff --git a/operators/constellation-node-operator/internal/gcp/client/disks.go b/operators/constellation-node-operator/internal/gcp/client/disks.go index 5fc18af74..14ab90e7f 100644 --- a/operators/constellation-node-operator/internal/gcp/client/disks.go +++ b/operators/constellation-node-operator/internal/gcp/client/disks.go @@ -40,3 +40,12 @@ func uriNormalize(imageURI string) string { } return matches[1] } + +// ensureURIPrefixed ensures that a compute API URI is prefixed with the optional URI prefix. +func ensureURIPrefixed(uri string) string { + matches := computeAPIBase.FindStringSubmatch(uri) + if len(matches) == 2 { + return uri + } + return "https://www.googleapis.com/compute/v1/" + uri +} diff --git a/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go b/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go index 803ea2f26..41aba3470 100644 --- a/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go +++ b/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go @@ -10,7 +10,6 @@ import ( "context" "errors" "fmt" - "strings" "google.golang.org/api/iterator" computepb "google.golang.org/genproto/googleapis/cloud/compute/v1" @@ -87,12 +86,23 @@ func (c *Client) SetScalingGroupImage(ctx context.Context, scalingGroupID, image } // GetScalingGroupName retrieves the name of a scaling group. -func (c *Client) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) { +// This keeps the casing of the original name, but Kubernetes requires the name to be lowercase, +// so use strings.ToLower() on the result if using the name in a Kubernetes context. +func (c *Client) GetScalingGroupName(scalingGroupID string) (string, error) { _, _, instanceGroupName, err := splitInstanceGroupID(scalingGroupID) if err != nil { return "", fmt.Errorf("getting scaling group name: %w", err) } - return strings.ToLower(instanceGroupName), nil + return instanceGroupName, nil +} + +// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler. +func (c *Client) GetAutoscalingGroupName(scalingGroupID string) (string, error) { + project, zone, instanceGroupName, err := splitInstanceGroupID(scalingGroupID) + if err != nil { + return "", fmt.Errorf("getting autoscaling scaling group name: %w", err) + } + return ensureURIPrefixed(fmt.Sprintf("projects/%s/zones/%s/instanceGroups/%s", project, zone, instanceGroupName)), nil } // ListScalingGroups retrieves a list of scaling groups for the cluster. diff --git a/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go b/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go index 627a9359c..810ba1418 100644 --- a/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go +++ b/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go @@ -311,7 +311,7 @@ func TestGetScalingGroupName(t *testing.T) { require := require.New(t) client := Client{} - gotName, err := client.GetScalingGroupName(context.Background(), tc.scalingGroupID) + gotName, err := client.GetScalingGroupName(tc.scalingGroupID) if tc.wantErr { assert.Error(err) return diff --git a/operators/constellation-node-operator/main.go b/operators/constellation-node-operator/main.go index 18743da04..58b8fdcc1 100644 --- a/operators/constellation-node-operator/main.go +++ b/operators/constellation-node-operator/main.go @@ -191,7 +191,11 @@ type cspAPI interface { // SetScalingGroupImage sets the image to be used by newly created nodes in a scaling group. SetScalingGroupImage(ctx context.Context, scalingGroupID, imageURI string) error // GetScalingGroupName retrieves the name of a scaling group. - GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) + GetScalingGroupName(scalingGroupID string) (string, error) + // GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler. + GetAutoscalingGroupName(scalingGroupID string) (string, error) // ListScalingGroups retrieves a list of scaling groups for the cluster. ListScalingGroups(ctx context.Context, uid string) (controlPlaneGroupIDs []string, workerGroupIDs []string, err error) + // AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler. + AutoscalingCloudProvider() string }