node operator: nodeimage controller: ensure heirs are promoted to updated once in same reconcile loop as node deletion

Prevents conditions where Reconcile is not called after deleting a node, leading to an out of date status on the nodeimage.
This commit is contained in:
Malte Poll 2022-09-06 17:36:08 +02:00 committed by Paul Meyer
parent 8b4918cc53
commit 5f98e699e4

View file

@ -163,6 +163,8 @@ func (r *NodeImageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }
// should requeue is set if a node is deleted
var shouldRequeue bool
// find pairs of mint nodes and outdated nodes in the same scaling group to become donor & heir // find pairs of mint nodes and outdated nodes in the same scaling group to become donor & heir
replacementPairs := r.pairDonorsAndHeirs(ctx, &desiredNodeImage, groups.Outdated, groups.Mint) replacementPairs := r.pairDonorsAndHeirs(ctx, &desiredNodeImage, groups.Outdated, groups.Mint)
// extend replacement pairs to include existing pairs of donors and heirs // extend replacement pairs to include existing pairs of donors and heirs
@ -170,29 +172,41 @@ func (r *NodeImageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// replace donor nodes by heirs // replace donor nodes by heirs
for _, pair := range replacementPairs { for _, pair := range replacementPairs {
logr.Info("Replacing node", "donorNode", pair.donor.Name, "heirNode", pair.heir.Name) logr.Info("Replacing node", "donorNode", pair.donor.Name, "heirNode", pair.heir.Name)
if err := r.replaceNode(ctx, &desiredNodeImage, pair); err != nil { done, err := r.replaceNode(ctx, &desiredNodeImage, pair)
if err != nil {
logr.Error(err, "Replacing node") logr.Error(err, "Replacing node")
return ctrl.Result{}, err return ctrl.Result{}, err
} }
if done {
shouldRequeue = true
// remove donor annotation from heir
if err := r.patchUnsetNodeAnnotations(ctx, pair.heir.Name, []string{donorAnnotation}); err != nil {
logr.Error(err, "Unable to remove donor annotation from heir", "heirNode", pair.heir.Name)
}
}
} }
// only create new nodes if the autoscaler is disabled. // only create new nodes if the autoscaler is disabled.
// otherwise, new nodes will also be created by the autoscaler // otherwise, new nodes will also be created by the autoscaler
if autoscalingEnabled { if autoscalingEnabled {
return ctrl.Result{}, nil return ctrl.Result{Requeue: shouldRequeue}, nil
} }
if err := r.createNewNodes(ctx, desiredNodeImage, groups.Outdated, pendingNodeList.Items, scalingGroupByID, newNodesBudget); err != nil { if err := r.createNewNodes(ctx, desiredNodeImage, groups.Outdated, pendingNodeList.Items, scalingGroupByID, newNodesBudget); err != nil {
return ctrl.Result{}, err return ctrl.Result{Requeue: shouldRequeue}, nil
} }
// cleanup obsolete nodes // cleanup obsolete nodes
for _, node := range groups.Obsolete { for _, node := range groups.Obsolete {
if _, err := r.deleteNode(ctx, &desiredNodeImage, node); err != nil { done, err := r.deleteNode(ctx, &desiredNodeImage, node)
if err != nil {
logr.Error(err, "Unable to remove obsolete node") logr.Error(err, "Unable to remove obsolete node")
} }
if done {
shouldRequeue = true
}
} }
return ctrl.Result{}, nil return ctrl.Result{Requeue: shouldRequeue}, nil
} }
// SetupWithManager sets up the controller with the Manager. // SetupWithManager sets up the controller with the Manager.
@ -403,20 +417,19 @@ func (r *NodeImageReconciler) ensureAutoscaling(ctx context.Context, autoscaling
// Labels are copied from the donor node to the heir node. // Labels are copied from the donor node to the heir node.
// Readiness of the heir node is awaited. // Readiness of the heir node is awaited.
// Deletion of the donor node is scheduled. // Deletion of the donor node is scheduled.
func (r *NodeImageReconciler) replaceNode(ctx context.Context, controller metav1.Object, pair replacementPair) error { func (r *NodeImageReconciler) replaceNode(ctx context.Context, controller metav1.Object, pair replacementPair) (bool, error) {
logr := log.FromContext(ctx) logr := log.FromContext(ctx)
if !reflect.DeepEqual(nodeutil.FilterLabels(pair.donor.Labels), nodeutil.FilterLabels(pair.heir.Labels)) { if !reflect.DeepEqual(nodeutil.FilterLabels(pair.donor.Labels), nodeutil.FilterLabels(pair.heir.Labels)) {
if err := r.copyNodeLabels(ctx, pair.donor.Name, pair.heir.Name); err != nil { if err := r.copyNodeLabels(ctx, pair.donor.Name, pair.heir.Name); err != nil {
logr.Error(err, "Copy node labels") logr.Error(err, "Copy node labels")
return err return false, err
} }
} }
heirReady := nodeutil.Ready(&pair.heir) heirReady := nodeutil.Ready(&pair.heir)
if !heirReady { if !heirReady {
return nil return false, nil
} }
_, err := r.deleteNode(ctx, controller, pair.donor) return r.deleteNode(ctx, controller, pair.donor)
return err
} }
// deleteNode safely removes a node from the cluster and issues termination of the node by the CSP. // deleteNode safely removes a node from the cluster and issues termination of the node by the CSP.