mirror of
https://github.com/edgelesssys/constellation.git
synced 2025-05-02 14:26:23 -04:00
Let operator manage autoscaling of node groups
This commit is contained in:
parent
67d9be38d7
commit
e301f575df
18 changed files with 499 additions and 42 deletions
|
@ -8,14 +8,22 @@ package controllers
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/util/retry"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/builder"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/event"
|
||||
"sigs.k8s.io/controller-runtime/pkg/handler"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
"sigs.k8s.io/controller-runtime/pkg/predicate"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
"sigs.k8s.io/controller-runtime/pkg/source"
|
||||
|
||||
updatev1alpha1 "github.com/edgelesssys/constellation/operators/constellation-node-operator/api/v1alpha1"
|
||||
)
|
||||
|
@ -29,6 +37,7 @@ type AutoscalingStrategyReconciler struct {
|
|||
//+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies,verbs=get;list;watch;create;update;patch;delete
|
||||
//+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/status,verbs=get;update;patch
|
||||
//+kubebuilder:rbac:groups=update.edgeless.systems,resources=autoscalingstrategies/finalizers,verbs=update
|
||||
//+kubebuilder:rbac:groups=update.edgeless.systems,resources=scalinggroups,verbs=get;list;watch
|
||||
//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;delete
|
||||
|
||||
// Reconcile enabled or disables the cluster-autoscaler based on the AutoscalingStrategy spec
|
||||
|
@ -46,8 +55,32 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl.
|
|||
expectedReplicas = 1
|
||||
}
|
||||
|
||||
var scalingGroups updatev1alpha1.ScalingGroupList
|
||||
if err := r.List(ctx, &scalingGroups); err != nil {
|
||||
logr.Error(err, "Unable to fetch ScalingGroups")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
autoscalerArgs := []string{"./cluster-autoscaler"}
|
||||
for key, val := range desiredAutoscalingStrategy.Spec.AutoscalerExtraArgs {
|
||||
autoscalerArgs = append(autoscalerArgs, "--"+key+"="+val)
|
||||
}
|
||||
const nodeGroupFmt = "--nodes=%d:%d:%s"
|
||||
for _, group := range scalingGroups.Items {
|
||||
// Don't autoscale control plane nodes for safety reasons.
|
||||
if group.Spec.Autoscaling && group.Spec.Role != updatev1alpha1.ControlPlaneRole {
|
||||
groupArg := fmt.Sprintf(nodeGroupFmt, group.Spec.Min, group.Spec.Max, group.Spec.AutoscalerGroupName)
|
||||
autoscalerArgs = append(autoscalerArgs, groupArg)
|
||||
}
|
||||
}
|
||||
sort.Strings(autoscalerArgs[1:])
|
||||
|
||||
var autoscalerDeployment appsv1.Deployment
|
||||
if err := r.Get(ctx, client.ObjectKey{Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace, Name: desiredAutoscalingStrategy.Spec.DeploymentName}, &autoscalerDeployment); err != nil {
|
||||
deploymentKey := client.ObjectKey{
|
||||
Namespace: desiredAutoscalingStrategy.Spec.DeploymentNamespace,
|
||||
Name: desiredAutoscalingStrategy.Spec.DeploymentName,
|
||||
}
|
||||
if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil {
|
||||
logr.Error(err, "Unable to fetch autoscaler Deployment")
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
@ -65,26 +98,116 @@ func (r *AutoscalingStrategyReconciler) Reconcile(ctx context.Context, req ctrl.
|
|||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
var needUpdate bool
|
||||
if autoscalerDeployment.Spec.Replicas == nil || *autoscalerDeployment.Spec.Replicas != expectedReplicas {
|
||||
logr.Info("Updating autoscaling replicas", "expectedReplicas", expectedReplicas)
|
||||
autoscalerDeployment.Spec.Replicas = &expectedReplicas
|
||||
if err := r.Update(ctx, &autoscalerDeployment); err != nil {
|
||||
logr.Error(err, "Unable to update autoscaler Deployment")
|
||||
return ctrl.Result{}, err
|
||||
needUpdate = needUpdate || true
|
||||
}
|
||||
containers := autoscalerDeployment.Spec.Template.Spec.Containers
|
||||
if len(containers) != 0 && containers[0].Command == nil { // uninitialized
|
||||
needUpdate = needUpdate || true
|
||||
}
|
||||
if len(containers) != 0 && containers[0].Command != nil { // args updated
|
||||
if len(containers[0].Command) != len(autoscalerArgs) {
|
||||
needUpdate = needUpdate || true
|
||||
} else {
|
||||
for i, arg := range containers[0].Command {
|
||||
if arg != autoscalerArgs[i] {
|
||||
needUpdate = needUpdate || true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
if !needUpdate {
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
logr.Info("Updating autoscaling replicas and command", "expectedReplicas", expectedReplicas, "autoscalerArgs", autoscalerArgs)
|
||||
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
||||
if err := r.Get(ctx, deploymentKey, &autoscalerDeployment); err != nil {
|
||||
return err
|
||||
}
|
||||
autoscalerDeployment.Spec.Replicas = &expectedReplicas
|
||||
if len(containers) != 0 {
|
||||
logr.Info("Updating autoscaler command", "old", containers[0].Command, "new", autoscalerArgs)
|
||||
autoscalerDeployment.Spec.Template.Spec.Containers[0].Command = autoscalerArgs
|
||||
}
|
||||
return r.Update(ctx, &autoscalerDeployment)
|
||||
})
|
||||
if err != nil {
|
||||
logr.Error(err, "Unable to update autoscaler Deployment")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *AutoscalingStrategyReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&updatev1alpha1.AutoscalingStrategy{}).
|
||||
Watches(
|
||||
&source.Kind{Type: &updatev1alpha1.ScalingGroup{}},
|
||||
handler.EnqueueRequestsFromMapFunc(r.findObjectsForDeployment),
|
||||
builder.WithPredicates(scalingGroupChangePredicate()),
|
||||
).
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
func (r *AutoscalingStrategyReconciler) findObjectsForDeployment(_ client.Object) []reconcile.Request {
|
||||
var autoscalingStrats updatev1alpha1.AutoscalingStrategyList
|
||||
err := r.List(context.Background(), &autoscalingStrats)
|
||||
if err != nil {
|
||||
return []reconcile.Request{}
|
||||
}
|
||||
|
||||
requests := make([]reconcile.Request, len(autoscalingStrats.Items))
|
||||
for i, item := range autoscalingStrats.Items {
|
||||
requests[i] = reconcile.Request{
|
||||
NamespacedName: types.NamespacedName{
|
||||
Name: item.GetName(),
|
||||
},
|
||||
}
|
||||
}
|
||||
return requests
|
||||
}
|
||||
|
||||
// scalingGroupChangePredicate filters events on scaling group resources.
|
||||
func scalingGroupChangePredicate() predicate.Predicate {
|
||||
return predicate.Funcs{
|
||||
CreateFunc: func(e event.CreateEvent) bool {
|
||||
return true
|
||||
},
|
||||
UpdateFunc: func(e event.UpdateEvent) bool {
|
||||
oldGroup, ok := e.ObjectOld.(*updatev1alpha1.ScalingGroup)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
newGroup, ok := e.ObjectNew.(*updatev1alpha1.ScalingGroup)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
switch {
|
||||
case oldGroup.Spec.Min != newGroup.Spec.Min:
|
||||
return true
|
||||
case oldGroup.Spec.Max != newGroup.Spec.Max:
|
||||
return true
|
||||
case oldGroup.Spec.Autoscaling != newGroup.Spec.Autoscaling:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
},
|
||||
DeleteFunc: func(e event.DeleteEvent) bool {
|
||||
return true
|
||||
},
|
||||
GenericFunc: func(e event.GenericEvent) bool {
|
||||
return false
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// tryUpdateStatus attempts to update the AutoscalingStrategy status field in a retry loop.
|
||||
func (r *AutoscalingStrategyReconciler) tryUpdateStatus(ctx context.Context, name types.NamespacedName, status updatev1alpha1.AutoscalingStrategyStatus) error {
|
||||
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
||||
|
|
|
@ -28,6 +28,8 @@ var _ = Describe("AutoscalingStrategy controller", func() {
|
|||
ClusterAutoscalerDeploymentName = "cluster-autoscaler"
|
||||
ClusterAutoscalerDeploymentNamespace = "kube-system"
|
||||
AutoscalingStrategyName = "test-strategy"
|
||||
ScalingGroupNameWorker = "worker-group"
|
||||
ScalingGroupNameControlPlane = "control-plane-group"
|
||||
|
||||
timeout = time.Second * 20
|
||||
duration = time.Second * 2
|
||||
|
@ -162,4 +164,182 @@ var _ = Describe("AutoscalingStrategy controller", func() {
|
|||
Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed())
|
||||
})
|
||||
})
|
||||
|
||||
Context("When changing autoscaling of node groups", func() {
|
||||
It("Should update the autoscaler deployment command", func() {
|
||||
By("creating a cluster-autoscaler deployment")
|
||||
ctx := context.Background()
|
||||
autoscalerDeployment := &appsv1.Deployment{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
APIVersion: "apps/v1",
|
||||
Kind: "Deployment",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: ClusterAutoscalerDeploymentName,
|
||||
Namespace: ClusterAutoscalerDeploymentNamespace,
|
||||
},
|
||||
Spec: appsv1.DeploymentSpec{
|
||||
Replicas: &ClusterAutoscalerStartingReplicas,
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"app.kubernetes.io/name": "cluster-autoscaler",
|
||||
},
|
||||
},
|
||||
Template: corev1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"app.kubernetes.io/name": "cluster-autoscaler",
|
||||
},
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
Containers: []corev1.Container{
|
||||
{Image: "cluster-autoscaler", Name: "cluster-autoscaler"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
Expect(k8sClient.Create(ctx, autoscalerDeployment)).Should(Succeed())
|
||||
createdDeployment := &appsv1.Deployment{}
|
||||
Eventually(func() error {
|
||||
return k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
|
||||
}, timeout, interval).Should(Succeed())
|
||||
|
||||
Expect(createdDeployment.Spec.Template.Spec.Containers).NotTo(BeEmpty())
|
||||
Expect(createdDeployment.Spec.Template.Spec.Containers[0].Command).To(BeEmpty())
|
||||
|
||||
By("creating an autoscaling strategy")
|
||||
strategy := &updatev1alpha1.AutoscalingStrategy{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
APIVersion: "update.edgeless.systems/v1alpha1",
|
||||
Kind: "AutoscalingStrategy",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: AutoscalingStrategyName,
|
||||
},
|
||||
Spec: updatev1alpha1.AutoscalingStrategySpec{
|
||||
DeploymentName: ClusterAutoscalerDeploymentName,
|
||||
DeploymentNamespace: ClusterAutoscalerDeploymentNamespace,
|
||||
AutoscalerExtraArgs: map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "qux",
|
||||
},
|
||||
},
|
||||
}
|
||||
Expect(k8sClient.Create(ctx, strategy)).Should(Succeed())
|
||||
strategyLookupKey := types.NamespacedName{Name: AutoscalingStrategyName}
|
||||
createdStrategy := &updatev1alpha1.AutoscalingStrategy{}
|
||||
Eventually(func() error {
|
||||
return k8sClient.Get(ctx, strategyLookupKey, createdStrategy)
|
||||
}, timeout, interval).Should(Succeed())
|
||||
|
||||
By("checking the autoscaling deployment eventually has the correct command")
|
||||
Eventually(func() []string {
|
||||
err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return createdDeployment.Spec.Template.Spec.Containers[0].Command
|
||||
}, timeout, interval).Should(Equal([]string{
|
||||
"./cluster-autoscaler",
|
||||
"--baz=qux",
|
||||
"--foo=bar",
|
||||
}))
|
||||
|
||||
By("creating a new worker scaling group")
|
||||
scalingGroup := &updatev1alpha1.ScalingGroup{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
APIVersion: "update.edgeless.systems/v1alpha1",
|
||||
Kind: "ScalingGroup",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: ScalingGroupNameWorker,
|
||||
},
|
||||
Spec: updatev1alpha1.ScalingGroupSpec{
|
||||
GroupID: ScalingGroupNameWorker,
|
||||
AutoscalerGroupName: ScalingGroupNameWorker,
|
||||
Autoscaling: true,
|
||||
Min: 1,
|
||||
Max: 10,
|
||||
Role: updatev1alpha1.WorkerRole,
|
||||
},
|
||||
}
|
||||
Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed())
|
||||
scalingGroupLookupKey := types.NamespacedName{Name: ScalingGroupNameWorker}
|
||||
Eventually(func() error {
|
||||
return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup)
|
||||
}, timeout, interval).Should(Succeed())
|
||||
By("checking the controller eventually updates the autoscaler deployment command")
|
||||
Eventually(func() []string {
|
||||
err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return createdDeployment.Spec.Template.Spec.Containers[0].Command
|
||||
}, timeout, interval).Should(Equal([]string{
|
||||
"./cluster-autoscaler",
|
||||
"--baz=qux",
|
||||
"--foo=bar",
|
||||
"--nodes=1:10:worker-group",
|
||||
}))
|
||||
|
||||
By("Disabling autoscaling for the worker scaling group")
|
||||
Eventually(func() error {
|
||||
scalingGroup.Spec.Autoscaling = false
|
||||
return k8sClient.Update(ctx, scalingGroup)
|
||||
}, timeout, interval).Should(Succeed())
|
||||
By("checking the controller eventually updates the autoscaler deployment command")
|
||||
Eventually(func() []string {
|
||||
err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return createdDeployment.Spec.Template.Spec.Containers[0].Command
|
||||
}, timeout, interval).Should(Equal([]string{
|
||||
"./cluster-autoscaler",
|
||||
"--baz=qux",
|
||||
"--foo=bar",
|
||||
}))
|
||||
|
||||
By("creating a new control plane scaling group")
|
||||
scalingGroup = &updatev1alpha1.ScalingGroup{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
APIVersion: "update.edgeless.systems/v1alpha1",
|
||||
Kind: "ScalingGroup",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: ScalingGroupNameControlPlane,
|
||||
},
|
||||
Spec: updatev1alpha1.ScalingGroupSpec{
|
||||
GroupID: ScalingGroupNameControlPlane,
|
||||
AutoscalerGroupName: ScalingGroupNameControlPlane,
|
||||
Autoscaling: true,
|
||||
Min: 1,
|
||||
Max: 10,
|
||||
Role: updatev1alpha1.ControlPlaneRole,
|
||||
},
|
||||
}
|
||||
Expect(k8sClient.Create(ctx, scalingGroup)).Should(Succeed())
|
||||
scalingGroupLookupKey = types.NamespacedName{Name: ScalingGroupNameControlPlane}
|
||||
Eventually(func() error {
|
||||
return k8sClient.Get(ctx, scalingGroupLookupKey, scalingGroup)
|
||||
}, timeout, interval).Should(Succeed())
|
||||
By("checking the controller doesn't update the control plane node group autoscaling")
|
||||
Consistently(func() []string {
|
||||
err := k8sClient.Get(ctx, DeploymentLookupKey, createdDeployment)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return createdDeployment.Spec.Template.Spec.Containers[0].Command
|
||||
}, timeout, interval).Should(Equal([]string{
|
||||
"./cluster-autoscaler",
|
||||
"--baz=qux",
|
||||
"--foo=bar",
|
||||
}))
|
||||
|
||||
By("cleaning up all resources")
|
||||
Expect(k8sClient.Delete(ctx, autoscalerDeployment)).Should(Succeed())
|
||||
Expect(k8sClient.Delete(ctx, strategy)).Should(Succeed())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
@ -193,7 +193,7 @@ func nodeStateChangePredicate() predicate.Predicate {
|
|||
// findObjectsForNode requests reconciliation for PendingNode whenever the corresponding Node state changes.
|
||||
func (r *PendingNodeReconciler) findObjectsForNode(rawNode client.Object) []reconcile.Request {
|
||||
var pendingNodesList updatev1alpha1.PendingNodeList
|
||||
err := r.List(context.TODO(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()})
|
||||
err := r.List(context.Background(), &pendingNodesList, client.MatchingFields{nodeNameKey: rawNode.GetName()})
|
||||
if err != nil {
|
||||
return []reconcile.Request{}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue