Let operator manage autoscaling of node groups

2025-08-04 04:54:15 -04:00 · 2022-09-15 16:48:32 +02:00 · 2022-09-15 16:48:32 +02:00 · e301f575df
commit e301f575df
parent 67d9be38d7
18 changed files with 499 additions and 42 deletions
--- a/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go
+++ b/operators/constellation-node-operator/api/v1alpha1/autoscalingstrategy_types.go
@ -18,6 +18,8 @@ type AutoscalingStrategySpec struct {
 	DeploymentName string `json:"deploymentName"`
 	// DeploymentNamespace defines the namespace of the autoscaler deployment.
 	DeploymentNamespace string `json:"deploymentNamespace"`
+	// AutoscalerExtraArgs defines extra arguments to be passed to the autoscaler.
+	AutoscalerExtraArgs map[string]string `json:"autoscalerExtraArgs,omitempty"`
 }

 // AutoscalingStrategyStatus defines the observed state of AutoscalingStrategy.
--- a/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go
+++ b/operators/constellation-node-operator/api/v1alpha1/scalinggroup_types.go
@ -13,6 +13,11 @@ import (
 const (
 	// ConditionOutdated is used to signal outdated scaling groups.
 	ConditionOutdated = "Outdated"
+
+	// WorkerRole is used to signal worker scaling groups.
+	WorkerRole NodeRole = "Worker"
+	// ControlPlaneRole is used to signal control plane scaling groups.
+	ControlPlaneRole NodeRole = "ControlPlane"
 )

 // ScalingGroupSpec defines the desired state of ScalingGroup.
@ -21,10 +26,22 @@ type ScalingGroupSpec struct {
 	NodeImage string `json:"nodeImage,omitempty"`
 	// GroupID is the CSP specific, canonical identifier of a scaling group.
 	GroupID string `json:"groupId,omitempty"`
+	// AutoscalerGroupName is name that is expected by the autoscaler.
+	AutoscalerGroupName string `json:"autoscalerGroupName,omitempty"`
 	// Autoscaling specifies wether the scaling group should automatically scale using the cluster-autoscaler.
 	Autoscaling bool `json:"autoscaling,omitempty"`
+	// Min is the minimum number of nodes in the scaling group (used by cluster-autoscaler).
+	Min int32 `json:"min,omitempty"`
+	// Max is the maximum number of autoscaled nodes in the scaling group (used by cluster-autoscaler).
+	Max int32 `json:"max,omitempty"`
+	// Role is the role of the nodes in the scaling group.
+	Role NodeRole `json:"role,omitempty"`
 }

+// NodeRole is the role of a node.
+// +kubebuilder:validation:Enum=Worker;ControlPlane
+type NodeRole string
+
 // ScalingGroupStatus defines the observed state of ScalingGroup.
 type ScalingGroupStatus struct {
 	// ImageReference is the image currently used for newly created nodes in this scaling group.
--- a/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/operators/constellation-node-operator/api/v1alpha1/zz_generated.deepcopy.go
@ -16,7 +16,7 @@ func (in *AutoscalingStrategy) DeepCopyInto(out *AutoscalingStrategy) {
 	*out = *in
 	out.TypeMeta = in.TypeMeta
 	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
-	out.Spec = in.Spec
+	in.Spec.DeepCopyInto(&out.Spec)
 	out.Status = in.Status
 }

@ -73,6 +73,13 @@ func (in *AutoscalingStrategyList) DeepCopyObject() runtime.Object {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *AutoscalingStrategySpec) DeepCopyInto(out *AutoscalingStrategySpec) {
 	*out = *in
+	if in.AutoscalerExtraArgs != nil {
+		in, out := &in.AutoscalerExtraArgs, &out.AutoscalerExtraArgs
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
 }

 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingStrategySpec.
--- a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml
+++ b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_autoscalingstrategies.yaml
@ -36,6 +36,12 @@ spec:
          spec:
            description: AutoscalingStrategySpec defines the desired state of AutoscalingStrategy.
            properties:
+              autoscalerExtraArgs:
+                additionalProperties:
+                  type: string
+                description: AutoscalerExtraArgs defines extra arguments to be passed
+                  to the autoscaler.
+                type: object
              deploymentName:
                description: DeploymentName defines the name of the autoscaler deployment.
                type: string
--- a/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml
+++ b/operators/constellation-node-operator/config/crd/bases/update.edgeless.systems_scalinggroups.yaml
@ -35,6 +35,9 @@ spec:
          spec:
            description: ScalingGroupSpec defines the desired state of ScalingGroup.
            properties:
+              autoscalerGroupName:
+                description: AutoscalerGroupName is name that is expected by the autoscaler.
+                type: string
              autoscaling:
                description: Autoscaling specifies wether the scaling group should
                  automatically scale using the cluster-autoscaler.
@ -43,9 +46,23 @@ spec:
                description: GroupID is the CSP specific, canonical identifier of
                  a scaling group.
                type: string
+              max:
+                description: Max is the maximum number of nodes in the scaling group.
+                format: int32
+                type: integer
+              min:
+                description: Min is the minimum number of nodes in the scaling group.
+                format: int32
+                type: integer
              nodeImage:
                description: NodeImage is the name of the NodeImage resource.
                type: string
+              role:
+                description: Role is the role of the nodes in the scaling group.
+                enum:
+                - Worker
+                - ControlPlane
+                type: string
            type: object
          status:
            description: ScalingGroupStatus defines the observed state of ScalingGroup.
--- a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go
+++ b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller.go
@ -8,14 +8,22 @@ package controllers

 import (
 	"context"
+	"fmt"
+	"sort"

 	appsv1 "k8s.io/api/apps/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/util/retry"
 	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/event"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	"sigs.k8s.io/controller-runtime/pkg/source"

 	updatev1alpha1 "github.com/edgelesssys/constellation/operators/constellation-node-operator/api/v1alpha1"
 )
@ -29,6 +37,7 @@ type AutoscalingStrategyReconciler struct {
 //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/status,verbs=get;update;patch
 //+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/finalizers,verbs=update
+//+kubebuilder:rbac:groups=update.edgeless.systems,resources=scalinggroups,verbs=get;list;watch
 //+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;delete

 // Reconcile enabled or disables the cluster-autoscaler based on the AutoscalingStrategy spec
@ -46,8 +55,32 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl.
 		expectedReplicas = 1
 	}

+	var scalingGroups updatev1alpha1.ScalingGroupList
+	if err := r.List(ctx, &scalingGroups); err != nil {
+		logr.Error(err, "Unable to fetch ScalingGroups")
+		return ctrl.Result{}, err
+	}
+
+	autoscalerArgs := []string{"./cluster-autoscaler"}
+	for key, val := range desiredAutoscalingStrategy.Spec.AutoscalerExtraArgs {
+		autoscalerArgs = append(autoscalerArgs, "--"+key+"="+val)
+	}
+	const nodeGroupFmt = "--nodes=%d:%d:%s"
+	for _, group := range scalingGroups.Items {
+		// Don't autoscale control plane nodes for safety reasons.
+		if group.Spec.Autoscaling && group.Spec.Role != updatev1alpha1.ControlPlaneRole {
+			groupArg := fmt.Sprintf(nodeGroupFmt, group.Spec.Min, group.Spec.Max, group.Spec.AutoscalerGroupName)
+			autoscalerArgs = append(autoscalerArgs, groupArg)
+		}
+	}
+	sort.Strings(autoscalerArgs[1:])
+
 	var autoscalerDeployment appsv1.Deployment
-	if err := r.Get(ctx, client.ObjectKey{Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace, Name: desiredAutoscalingStrategy.Spec.DeploymentName}, &autoscalerDeployment); err != nil {
+	deploymentKey := client.ObjectKey{
+		Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace,
+		Name:      desiredAutoscalingStrategy.Spec.DeploymentName,
+	}
+	if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil {
 		logr.Error(err, "Unable to fetch autoscaler Deployment")
 		return ctrl.Result{}, client.IgnoreNotFound(err)
 	}
@ -65,26 +98,116 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl.
 		return ctrl.Result{}, err
 	}

+	var needUpdate bool
 	if autoscalerDeployment.Spec.Replicas == nil || *autoscalerDeployment.Spec.Replicas != expectedReplicas {
-		logr.Info("Updating autoscaling replicas", "expectedReplicas", expectedReplicas)
-		autoscalerDeployment.Spec.Replicas = &expectedReplicas
-		if err := r.Update(ctx, &autoscalerDeployment); err != nil {
-			logr.Error(err, "Unable to update autoscaler Deployment")
-			return ctrl.Result{}, err
+		needUpdate = needUpdate || true
+	}
+	containers := autoscalerDeployment.Spec.Template.Spec.Containers
+	if len(containers) != 0 && containers[0].Command == nil { // uninitialized
+		needUpdate = needUpdate || true
+	}
+	if len(containers) != 0 && containers[0].Command != nil { // args updated
+		if len(containers[0].Command) != len(autoscalerArgs) {
+			needUpdate = needUpdate || true
+		} else {
+			for i, arg := range containers[0].Command {
+				if arg != autoscalerArgs[i] {
+					needUpdate = needUpdate || true
+					break
+				}
+			}
 		}
-		return ctrl.Result{Requeue: true}, nil
 	}

-	return ctrl.Result{}, nil
+	if !needUpdate {
+		return ctrl.Result{}, nil
+	}
+
+	logr.Info("Updating autoscaling replicas and command", "expectedReplicas", expectedReplicas, "autoscalerArgs", autoscalerArgs)
+	err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+		if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil {
+			return err
+		}
+		autoscalerDeployment.Spec.Replicas = &expectedReplicas
+		if len(containers) != 0 {
+			logr.Info("Updating autoscaler command", "old", containers[0].Command, "new", autoscalerArgs)
+			autoscalerDeployment.Spec.Template.Spec.Containers[0].Command = autoscalerArgs
+		}
+		return r.Update(ctx, &autoscalerDeployment)
+	})
+	if err != nil {
+		logr.Error(err, "Unable to update autoscaler Deployment")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{Requeue: true}, nil
 }

 // SetupWithManager sets up the controller with the Manager.
 func (r *AutoscalingStrategyReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&updatev1alpha1.AutoscalingStrategy{}).
+		Watches(
+			&source.Kind{Type: &updatev1alpha1.ScalingGroup{}},
+			handler.EnqueueRequestsFromMapFunc(r.findObjectsForDeployment),
+			builder.WithPredicates(scalingGroupChangePredicate()),
+		).
 		Complete(r)
 }

+func (r *AutoscalingStrategyReconciler) findObjectsForDeployment(_ client.Object) []reconcile.Request {
+	var autoscalingStrats updatev1alpha1.AutoscalingStrategyList
+	err := r.List(context.Background(), &autoscalingStrats)
+	if err != nil {
+		return []reconcile.Request{}
+	}
+
+	requests := make([]reconcile.Request, len(autoscalingStrats.Items))
+	for i, item := range autoscalingStrats.Items {
+		requests[i] = reconcile.Request{
+			NamespacedName: types.NamespacedName{
+				Name: item.GetName(),
+			},
+		}
+	}
+	return requests
+}
+
+// scalingGroupChangePredicate filters events on scaling group resources.
+func scalingGroupChangePredicate() predicate.Predicate {
+	return predicate.Funcs{
+		CreateFunc: func(e event.CreateEvent) bool {
+			return true
+		},
+		UpdateFunc: func(e event.UpdateEvent) bool {
+			oldGroup, ok := e.ObjectOld.(*updatev1alpha1.ScalingGroup)
+			if !ok {
+				return false
+			}
+			newGroup, ok := e.ObjectNew.(*updatev1alpha1.ScalingGroup)
+			if !ok {
+				return false
+			}
+			switch {
+			case oldGroup.Spec.Min != newGroup.Spec.Min:
+				return true
+			case oldGroup.Spec.Max != newGroup.Spec.Max:
+				return true
+			case oldGroup.Spec.Autoscaling != newGroup.Spec.Autoscaling:
+				return true
+			default:
+				return false
+			}
+		},
+		DeleteFunc: func(e event.DeleteEvent) bool {
+			return true
+		},
+		GenericFunc: func(e event.GenericEvent) bool {
+			return false
+		},
+	}
+}
+
 // tryUpdateStatus attempts to update the AutoscalingStrategy status field in a retry loop.
 func (r *AutoscalingStrategyReconciler) tryUpdateStatus(ctx context.Context, name types.NamespacedName, status updatev1alpha1.AutoscalingStrategyStatus) error {
 	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
--- a/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go
+++ b/operators/constellation-node-operator/controllers/autoscalingstrategy_controller_env_test.go
@ -28,6 +28,8 @@ var _ = Describe("AutoscalingStrategy controller", func() {
 		ClusterAutoscalerDeploymentName      = "cluster-autoscaler"
 		ClusterAutoscalerDeploymentNamespace = "kube-system"
 		AutoscalingStrategyName              = "test-strategy"
+		ScalingGroupNameWorker               = "worker-group"
+		ScalingGroupNameControlPlane         = "control-plane-group"

 		timeout  = time.Second * 20
 		duration = time.Second * 2
@ -162,4 +164,182 @@ var _ = Describe("AutoscalingStrategy controller", func() {
 			Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed())
 		})
 	})
+
+	Context("When changing autoscaling of node groups", func() {
+		It("Should update the autoscaler deployment command", func() {
+			By("creating a cluster-autoscaler deployment")
+			ctx := context.Background()
+			autoscalerDeployment := &appsv1.Deployment{
+				TypeMeta: metav1.TypeMeta{
+					APIVersion: "apps/v1",
+					Kind:       "Deployment",
+				},
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      ClusterAutoscalerDeploymentName,
+					Namespace: ClusterAutoscalerDeploymentNamespace,
+				},
+				Spec: appsv1.DeploymentSpec{
+					Replicas: &ClusterAutoscalerStartingReplicas,
+					Selector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{
+							"app.kubernetes.io/name": "cluster-autoscaler",
+						},
+					},
+					Template: corev1.PodTemplateSpec{
+						ObjectMeta: metav1.ObjectMeta{
+							Labels: map[string]string{
+								"app.kubernetes.io/name": "cluster-autoscaler",
+							},
+						},
+						Spec: corev1.PodSpec{
+							Containers: []corev1.Container{
+								{Image: "cluster-autoscaler", Name: "cluster-autoscaler"},
+							},
+						},
+					},
+				},
+			}
+			Expect(k8sClient.Create(ctx, autoscalerDeployment)).Should(Succeed())
+			createdDeployment := &appsv1.Deployment{}
+			Eventually(func() error {
+				return k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
+			}, timeout, interval).Should(Succeed())
+
+			Expect(createdDeployment.Spec.Template.Spec.Containers).NotTo(BeEmpty())
+			Expect(createdDeployment.Spec.Template.Spec.Containers[0].Command).To(BeEmpty())
+
+			By("creating an autoscaling strategy")
+			strategy := &updatev1alpha1.AutoscalingStrategy{
+				TypeMeta: metav1.TypeMeta{
+					APIVersion: "update.edgeless.systems/v1alpha1",
+					Kind:       "AutoscalingStrategy",
+				},
+				ObjectMeta: metav1.ObjectMeta{
+					Name: AutoscalingStrategyName,
+				},
+				Spec: updatev1alpha1.AutoscalingStrategySpec{
+					DeploymentName:      ClusterAutoscalerDeploymentName,
+					DeploymentNamespace: ClusterAutoscalerDeploymentNamespace,
+					AutoscalerExtraArgs: map[string]string{
+						"foo": "bar",
+						"baz": "qux",
+					},
+				},
+			}
+			Expect(k8sClient.Create(ctx, strategy)).Should(Succeed())
+			strategyLookupKey := types.NamespacedName{Name: AutoscalingStrategyName}
+			createdStrategy := &updatev1alpha1.AutoscalingStrategy{}
+			Eventually(func() error {
+				return k8sClient.Get(ctx, strategyLookupKey, createdStrategy)
+			}, timeout, interval).Should(Succeed())
+
+			By("checking the autoscaling deployment eventually has the correct command")
+			Eventually(func() []string {
+				err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
+				if err != nil {
+					return nil
+				}
+				return createdDeployment.Spec.Template.Spec.Containers[0].Command
+			}, timeout, interval).Should(Equal([]string{
+				"./cluster-autoscaler",
+				"--baz=qux",
+				"--foo=bar",
+			}))
+
+			By("creating a new worker scaling group")
+			scalingGroup := &updatev1alpha1.ScalingGroup{
+				TypeMeta: metav1.TypeMeta{
+					APIVersion: "update.edgeless.systems/v1alpha1",
+					Kind:       "ScalingGroup",
+				},
+				ObjectMeta: metav1.ObjectMeta{
+					Name: ScalingGroupNameWorker,
+				},
+				Spec: updatev1alpha1.ScalingGroupSpec{
+					GroupID:             ScalingGroupNameWorker,
+					AutoscalerGroupName: ScalingGroupNameWorker,
+					Autoscaling:         true,
+					Min:                 1,
+					Max:                 10,
+					Role:                updatev1alpha1.WorkerRole,
+				},
+			}
+			Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed())
+			scalingGroupLookupKey := types.NamespacedName{Name: ScalingGroupNameWorker}
+			Eventually(func() error {
+				return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup)
+			}, timeout, interval).Should(Succeed())
+			By("checking the controller eventually updates the autoscaler deployment command")
+			Eventually(func() []string {
+				err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
+				if err != nil {
+					return nil
+				}
+				return createdDeployment.Spec.Template.Spec.Containers[0].Command
+			}, timeout, interval).Should(Equal([]string{
+				"./cluster-autoscaler",
+				"--baz=qux",
+				"--foo=bar",
+				"--nodes=1:10:worker-group",
+			}))
+
+			By("Disabling autoscaling for the worker scaling group")
+			Eventually(func() error {
+				scalingGroup.Spec.Autoscaling = false
+				return k8sClient.Update(ctx, scalingGroup)
+			}, timeout, interval).Should(Succeed())
+			By("checking the controller eventually updates the autoscaler deployment command")
+			Eventually(func() []string {
+				err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
+				if err != nil {
+					return nil
+				}
+				return createdDeployment.Spec.Template.Spec.Containers[0].Command
+			}, timeout, interval).Should(Equal([]string{
+				"./cluster-autoscaler",
+				"--baz=qux",
+				"--foo=bar",
+			}))
+
+			By("creating a new control plane scaling group")
+			scalingGroup = &updatev1alpha1.ScalingGroup{
+				TypeMeta: metav1.TypeMeta{
+					APIVersion: "update.edgeless.systems/v1alpha1",
+					Kind:       "ScalingGroup",
+				},
+				ObjectMeta: metav1.ObjectMeta{
+					Name: ScalingGroupNameControlPlane,
+				},
+				Spec: updatev1alpha1.ScalingGroupSpec{
+					GroupID:             ScalingGroupNameControlPlane,
+					AutoscalerGroupName: ScalingGroupNameControlPlane,
+					Autoscaling:         true,
+					Min:                 1,
+					Max:                 10,
+					Role:                updatev1alpha1.ControlPlaneRole,
+				},
+			}
+			Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed())
+			scalingGroupLookupKey = types.NamespacedName{Name: ScalingGroupNameControlPlane}
+			Eventually(func() error {
+				return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup)
+			}, timeout, interval).Should(Succeed())
+			By("checking the controller doesn't update the control plane node group autoscaling")
+			Consistently(func() []string {
+				err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
+				if err != nil {
+					return nil
+				}
+				return createdDeployment.Spec.Template.Spec.Containers[0].Command
+			}, timeout, interval).Should(Equal([]string{
+				"./cluster-autoscaler",
+				"--baz=qux",
+				"--foo=bar",
+			}))
+
+			By("cleaning up all resources")
+			Expect(k8sClient.Delete(ctx, autoscalerDeployment)).Should(Succeed())
+			Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed())
+		})
+	})
 })
--- a/operators/constellation-node-operator/controllers/pendingnode_controller.go
+++ b/operators/constellation-node-operator/controllers/pendingnode_controller.go
@ -193,7 +193,7 @@ func nodeStateChangePredicate() predicate.Predicate {
 // findObjectsForNode requests reconciliation for PendingNode whenever the corresponding Node state changes.
 func (r *PendingNodeReconciler) findObjectsForNode(rawNode client.Object) []reconcile.Request {
 	var pendingNodesList updatev1alpha1.PendingNodeList
-	err := r.List(context.TODO(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()})
+	err := r.List(context.Background(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()})
 	if err != nil {
 		return []reconcile.Request{}
 	}
--- a/operators/constellation-node-operator/internal/azure/client/autoscaler.go
+++ b/operators/constellation-node-operator/internal/azure/client/autoscaler.go
@ -0,0 +1,12 @@
+/*
+Copyright (c) Edgeless Systems GmbH
+
+SPDX-License-Identifier: AGPL-3.0-only
+*/
+
+package client
+
+// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler.
+func (c *Client) AutoscalingCloudProvider() string {
+	return "azure"
+}
--- a/operators/constellation-node-operator/internal/azure/client/scalinggroup.go
+++ b/operators/constellation-node-operator/internal/azure/client/scalinggroup.go
@ -63,13 +63,20 @@ func (c *Client) SetScalingGroupImage(ctx context.Context, scalingGroupID, image
 	return nil
 }

-// GetScalingGroupName retrieves the name of a scaling group.
-func (c *Client) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) {
+// GetScalingGroupName retrieves the name of a scaling group, as expected by Kubernetes.
+// This keeps the casing of the original name, but Kubernetes requires the name to be lowercase,
+// so use strings.ToLower() on the result if using the name in a Kubernetes context.
+func (c *Client) GetScalingGroupName(scalingGroupID string) (string, error) {
 	_, _, scaleSet, err := splitVMSSID(scalingGroupID)
 	if err != nil {
 		return "", fmt.Errorf("getting scaling group name: %w", err)
 	}
-	return strings.ToLower(scaleSet), nil
+	return scaleSet, nil
+}
+
+// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler.
+func (c *Client) GetAutoscalingGroupName(scalingGroupID string) (string, error) {
+	return c.GetScalingGroupName(scalingGroupID)
 }

 // ListScalingGroups retrieves a list of scaling groups for the cluster.
--- a/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go
+++ b/operators/constellation-node-operator/internal/azure/client/scalinggroup_test.go
@ -155,9 +155,9 @@ func TestGetScalingGroupName(t *testing.T) {
 			scalingGroupID: "/subscriptions/subscription-id/resourceGroups/resource-group/providers/Microsoft.Compute/virtualMachineScaleSets/scale-set-name",
 			wantName:       "scale-set-name",
 		},
-		"uppercase name is lowercased": {
+		"uppercase name isn't lowercased": {
 			scalingGroupID: "/subscriptions/subscription-id/resourceGroups/resource-group/providers/Microsoft.Compute/virtualMachineScaleSets/SCALE-SET-NAME",
-			wantName:       "scale-set-name",
+			wantName:       "SCALE-SET-NAME",
 		},
 		"splitting scalingGroupID fails": {
 			scalingGroupID: "invalid",
@ -171,7 +171,7 @@ func TestGetScalingGroupName(t *testing.T) {
 			require := require.New(t)

 			client := Client{}
-			gotName, err := client.GetScalingGroupName(context.Background(), tc.scalingGroupID)
+			gotName, err := client.GetScalingGroupName(tc.scalingGroupID)
 			if tc.wantErr {
 				assert.Error(err)
 				return
--- a/operators/constellation-node-operator/internal/deploy/deploy.go
+++ b/operators/constellation-node-operator/internal/deploy/deploy.go
@ -11,6 +11,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"strings"

 	updatev1alpha1 "github.com/edgelesssys/constellation/operators/constellation-node-operator/api/v1alpha1"
 	"github.com/edgelesssys/constellation/operators/constellation-node-operator/internal/constants"
@ -32,7 +33,7 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup
 		return errors.New("determining initial node image: no worker scaling group found")
 	}

-	if err := createAutoscalingStrategy(ctx, k8sClient); err != nil {
+	if err := createAutoscalingStrategy(ctx, k8sClient, scalingGroupGetter.AutoscalingCloudProvider()); err != nil {
 		return fmt.Errorf("creating initial autoscaling strategy: %w", err)
 	}
 	imageReference, err := scalingGroupGetter.GetScalingGroupImage(ctx, controlPlaneGroupIDs[0])
@ -43,20 +44,28 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup
 		return fmt.Errorf("creating initial node image %q: %w", imageReference, err)
 	}
 	for _, groupID := range controlPlaneGroupIDs {
-		groupName, err := scalingGroupGetter.GetScalingGroupName(ctx, groupID)
+		groupName, err := scalingGroupGetter.GetScalingGroupName(groupID)
 		if err != nil {
 			return fmt.Errorf("determining scaling group name of %q: %w", groupID, err)
 		}
-		if err := createScalingGroup(ctx, k8sClient, groupID, groupName, false); err != nil {
+		autoscalingGroupName, err := scalingGroupGetter.GetAutoscalingGroupName(groupID)
+		if err != nil {
+			return fmt.Errorf("determining autoscaling group name of %q: %w", groupID, err)
+		}
+		if err := createScalingGroup(ctx, k8sClient, groupID, groupName, autoscalingGroupName, updatev1alpha1.ControlPlaneRole); err != nil {
 			return fmt.Errorf("creating initial control plane scaling group: %w", err)
 		}
 	}
 	for _, groupID := range workerGroupIDs {
-		groupName, err := scalingGroupGetter.GetScalingGroupName(ctx, groupID)
+		groupName, err := scalingGroupGetter.GetScalingGroupName(groupID)
 		if err != nil {
 			return fmt.Errorf("determining scaling group name of %q: %w", groupID, err)
 		}
-		if err := createScalingGroup(ctx, k8sClient, groupID, groupName, true); err != nil {
+		autoscalingGroupName, err := scalingGroupGetter.GetAutoscalingGroupName(groupID)
+		if err != nil {
+			return fmt.Errorf("determining autoscaling group name of %q: %w", groupID, err)
+		}
+		if err := createScalingGroup(ctx, k8sClient, groupID, groupName, autoscalingGroupName, updatev1alpha1.WorkerRole); err != nil {
 			return fmt.Errorf("creating initial worker scaling group: %w", err)
 		}
 	}
@ -64,7 +73,7 @@ func InitialResources(ctx context.Context, k8sClient client.Writer, scalingGroup
 }

 // createAutoscalingStrategy creates the autoscaling strategy resource if it does not exist yet.
-func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer) error {
+func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer, provider string) error {
 	err := k8sClient.Create(ctx, &updatev1alpha1.AutoscalingStrategy{
 		TypeMeta: metav1.TypeMeta{APIVersion: "update.edgeless.systems/v1alpha1", Kind: "AutoscalingStrategy"},
 		ObjectMeta: metav1.ObjectMeta{
@ -74,6 +83,13 @@ func createAutoscalingStrategy(ctx context.Context, k8sClient client.Writer) err
 			Enabled:             true,
 			DeploymentName:      "constellation-cluster-autoscaler",
 			DeploymentNamespace: "kube-system",
+			AutoscalerExtraArgs: map[string]string{
+				"cloud-provider":  provider,
+				"logtostderr":     "true",
+				"stderrthreshold": "info",
+				"v":               "2",
+				"namespace":       "kube-system",
+			},
 		},
 	})
 	if k8sErrors.IsAlreadyExists(err) {
@ -100,16 +116,19 @@ func createNodeImage(ctx context.Context, k8sClient client.Writer, imageReferenc
 }

 // createScalingGroup creates an initial scaling group resource if it does not exist yet.
-func createScalingGroup(ctx context.Context, k8sClient client.Writer, groupID, groupName string, autoscaling bool) error {
+func createScalingGroup(ctx context.Context, k8sClient client.Writer, groupID, groupName, autoscalingGroupName string, role updatev1alpha1.NodeRole) error {
 	err := k8sClient.Create(ctx, &updatev1alpha1.ScalingGroup{
 		TypeMeta: metav1.TypeMeta{APIVersion: "update.edgeless.systems/v1alpha1", Kind: "ScalingGroup"},
 		ObjectMeta: metav1.ObjectMeta{
-			Name: groupName,
+			Name: strings.ToLower(groupName),
 		},
 		Spec: updatev1alpha1.ScalingGroupSpec{
-			NodeImage:   constants.NodeImageResourceName,
-			GroupID:     groupID,
-			Autoscaling: autoscaling,
+			NodeImage:           constants.NodeImageResourceName,
+			GroupID:             groupID,
+			AutoscalerGroupName: autoscalingGroupName,
+			Min:                 1,
+			Max:                 10,
+			Role:                role,
 		},
 	})
 	if k8sErrors.IsAlreadyExists(err) {
@ -122,7 +141,11 @@ type scalingGroupGetter interface {
 	// GetScalingGroupImage retrieves the image currently used by a scaling group.
 	GetScalingGroupImage(ctx context.Context, scalingGroupID string) (string, error)
 	// GetScalingGroupName retrieves the name of a scaling group.
-	GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error)
+	GetScalingGroupName(scalingGroupID string) (string, error)
+	// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler.
+	GetAutoscalingGroupName(scalingGroupID string) (string, error)
 	// ListScalingGroups retrieves a list of scaling groups for the cluster.
 	ListScalingGroups(ctx context.Context, uid string) (controlPlaneGroupIDs []string, workerGroupIDs []string, err error)
+	// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler.
+	AutoscalingCloudProvider() string
 }
--- a/operators/constellation-node-operator/internal/deploy/deploy_test.go
+++ b/operators/constellation-node-operator/internal/deploy/deploy_test.go
@ -114,6 +114,13 @@ func TestCreateAutoscalingStrategy(t *testing.T) {
 					Enabled:             true,
 					DeploymentName:      "constellation-cluster-autoscaler",
 					DeploymentNamespace: "kube-system",
+					AutoscalerExtraArgs: map[string]string{
+						"cloud-provider":  "stub",
+						"logtostderr":     "true",
+						"stderrthreshold": "info",
+						"v":               "2",
+						"namespace":       "kube-system",
+					},
 				},
 			},
 		},
@ -132,6 +139,13 @@ func TestCreateAutoscalingStrategy(t *testing.T) {
 					Enabled:             true,
 					DeploymentName:      "constellation-cluster-autoscaler",
 					DeploymentNamespace: "kube-system",
+					AutoscalerExtraArgs: map[string]string{
+						"cloud-provider":  "stub",
+						"logtostderr":     "true",
+						"stderrthreshold": "info",
+						"v":               "2",
+						"namespace":       "kube-system",
+					},
 				},
 			},
 		},
@ -143,7 +157,7 @@ func TestCreateAutoscalingStrategy(t *testing.T) {
 			require := require.New(t)

 			k8sClient := &stubK8sClient{createErr: tc.createErr}
-			err := createAutoscalingStrategy(context.Background(), k8sClient)
+			err := createAutoscalingStrategy(context.Background(), k8sClient, "stub")
 			if tc.wantErr {
 				assert.Error(err)
 				return
@ -221,9 +235,12 @@ func TestCreateScalingGroup(t *testing.T) {
 					Name: "group-name",
 				},
 				Spec: updatev1alpha1.ScalingGroupSpec{
-					NodeImage:   constants.NodeImageResourceName,
-					GroupID:     "group-id",
-					Autoscaling: true,
+					NodeImage:           constants.NodeImageResourceName,
+					GroupID:             "group-id",
+					AutoscalerGroupName: "group-Name",
+					Min:                 1,
+					Max:                 10,
+					Role:                updatev1alpha1.WorkerRole,
 				},
 			},
 		},
@ -239,9 +256,12 @@ func TestCreateScalingGroup(t *testing.T) {
 					Name: "group-name",
 				},
 				Spec: updatev1alpha1.ScalingGroupSpec{
-					NodeImage:   constants.NodeImageResourceName,
-					GroupID:     "group-id",
-					Autoscaling: true,
+					NodeImage:           constants.NodeImageResourceName,
+					GroupID:             "group-id",
+					AutoscalerGroupName: "group-Name",
+					Min:                 1,
+					Max:                 10,
+					Role:                updatev1alpha1.WorkerRole,
 				},
 			},
 		},
@ -253,7 +273,7 @@ func TestCreateScalingGroup(t *testing.T) {
 			require := require.New(t)

 			k8sClient := &stubK8sClient{createErr: tc.createErr}
-			err := createScalingGroup(context.Background(), k8sClient, "group-id", "group-name", true)
+			err := createScalingGroup(context.Background(), k8sClient, "group-id", "group-Name", "group-Name", updatev1alpha1.WorkerRole)
 			if tc.wantErr {
 				assert.Error(err)
 				return
@ -300,7 +320,11 @@ func (g *stubScalingGroupGetter) GetScalingGroupImage(ctx context.Context, scali
 	return g.store[scalingGroupID].image, g.imageErr
 }

-func (g *stubScalingGroupGetter) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) {
+func (g *stubScalingGroupGetter) GetScalingGroupName(scalingGroupID string) (string, error) {
+	return g.store[scalingGroupID].name, g.nameErr
+}
+
+func (g *stubScalingGroupGetter) GetAutoscalingGroupName(scalingGroupID string) (string, error) {
 	return g.store[scalingGroupID].name, g.nameErr
 }

@ -315,6 +339,10 @@ func (g *stubScalingGroupGetter) ListScalingGroups(ctx context.Context, uid stri
 	return controlPlaneGroupIDs, workerGroupIDs, g.listErr
 }

+func (g *stubScalingGroupGetter) AutoscalingCloudProvider() string {
+	return "stub"
+}
+
 type scalingGroupStoreItem struct {
 	groupID        string
 	name           string
--- a/operators/constellation-node-operator/internal/gcp/client/autocaler.go
+++ b/operators/constellation-node-operator/internal/gcp/client/autocaler.go
@ -0,0 +1,12 @@
+/*
+Copyright (c) Edgeless Systems GmbH
+
+SPDX-License-Identifier: AGPL-3.0-only
+*/
+
+package client
+
+// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler.
+func (c *Client) AutoscalingCloudProvider() string {
+	return "gce"
+}
--- a/operators/constellation-node-operator/internal/gcp/client/disks.go
+++ b/operators/constellation-node-operator/internal/gcp/client/disks.go
@ -40,3 +40,12 @@ func uriNormalize(imageURI string) string {
 	}
 	return matches[1]
 }
+
+// ensureURIPrefixed ensures that a compute API URI is prefixed with the optional URI prefix.
+func ensureURIPrefixed(uri string) string {
+	matches := computeAPIBase.FindStringSubmatch(uri)
+	if len(matches) == 2 {
+		return uri
+	}
+	return "https://www.googleapis.com/compute/v1/" + uri
+}
--- a/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go
+++ b/operators/constellation-node-operator/internal/gcp/client/scalinggroup.go
@ -10,7 +10,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"strings"

 	"google.golang.org/api/iterator"
 	computepb "google.golang.org/genproto/googleapis/cloud/compute/v1"
@ -87,12 +86,23 @@ func (c *Client) SetScalingGroupImage(ctx context.Context, scalingGroupID, image
 }

 // GetScalingGroupName retrieves the name of a scaling group.
-func (c *Client) GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error) {
+// This keeps the casing of the original name, but Kubernetes requires the name to be lowercase,
+// so use strings.ToLower() on the result if using the name in a Kubernetes context.
+func (c *Client) GetScalingGroupName(scalingGroupID string) (string, error) {
 	_, _, instanceGroupName, err := splitInstanceGroupID(scalingGroupID)
 	if err != nil {
 		return "", fmt.Errorf("getting scaling group name: %w", err)
 	}
-	return strings.ToLower(instanceGroupName), nil
+	return instanceGroupName, nil
+}
+
+// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler.
+func (c *Client) GetAutoscalingGroupName(scalingGroupID string) (string, error) {
+	project, zone, instanceGroupName, err := splitInstanceGroupID(scalingGroupID)
+	if err != nil {
+		return "", fmt.Errorf("getting autoscaling scaling group name: %w", err)
+	}
+	return ensureURIPrefixed(fmt.Sprintf("projects/%s/zones/%s/instanceGroups/%s", project, zone, instanceGroupName)), nil
 }

 // ListScalingGroups retrieves a list of scaling groups for the cluster.
--- a/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go
+++ b/operators/constellation-node-operator/internal/gcp/client/scalinggroup_test.go
@ -311,7 +311,7 @@ func TestGetScalingGroupName(t *testing.T) {
 			require := require.New(t)

 			client := Client{}
-			gotName, err := client.GetScalingGroupName(context.Background(), tc.scalingGroupID)
+			gotName, err := client.GetScalingGroupName(tc.scalingGroupID)
 			if tc.wantErr {
 				assert.Error(err)
 				return
--- a/operators/constellation-node-operator/main.go
+++ b/operators/constellation-node-operator/main.go
@ -191,7 +191,11 @@ type cspAPI interface {
 	// SetScalingGroupImage sets the image to be used by newly created nodes in a scaling group.
 	SetScalingGroupImage(ctx context.Context, scalingGroupID, imageURI string) error
 	// GetScalingGroupName retrieves the name of a scaling group.
-	GetScalingGroupName(ctx context.Context, scalingGroupID string) (string, error)
+	GetScalingGroupName(scalingGroupID string) (string, error)
+	// GetScalingGroupName retrieves the name of a scaling group as needed by the cluster-autoscaler.
+	GetAutoscalingGroupName(scalingGroupID string) (string, error)
 	// ListScalingGroups retrieves a list of scaling groups for the cluster.
 	ListScalingGroups(ctx context.Context, uid string) (controlPlaneGroupIDs []string, workerGroupIDs []string, err error)
+	// AutoscalingCloudProvider returns the cloud-provider name as used by k8s cluster-autoscaler.
+	AutoscalingCloudProvider() string
 }