Paul Meyer 81acdecd22 bazle: manage 3rdparty/node-maintainance-operator
Signed-off-by: Paul Meyer <49727155+katexochen@users.noreply.github.com>
2023-03-29 12:51:40 -04:00

254 lines
8.4 KiB
Go

/*
Copyright 2021.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1beta1
import (
"context"
"fmt"
"os"
"path/filepath"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)
const (
ErrorNodeNotExists = "invalid nodeName, no node with name %s found"
ErrorNodeMaintenanceExists = "invalid nodeName, a NodeMaintenance for node %s already exists"
ErrorNodeNameUpdateForbidden = "updating spec.NodeName isn't allowed"
ErrorControlPlaneQuorumViolation = "can not put master/control-plane node into maintenance at this moment, it would violate the master/control-plane node quorum"
)
const (
EtcdQuorumPDBNewName = "etcd-guard-pdb" // The new name of the PDB - From OCP 4.11
EtcdQuorumPDBOldName = "etcd-quorum-guard" // The old name of the PDB - Up to OCP 4.10
EtcdQuorumPDBNamespace = "openshift-etcd"
LabelNameRoleMaster = "node-role.kubernetes.io/master"
LabelNameRoleControlPlane = "node-role.kubernetes.io/control-plane"
)
const (
WebhookCertDir = "/apiserver.local.config/certificates"
WebhookCertName = "apiserver.crt"
WebhookKeyName = "apiserver.key"
)
// log is for logging in this package.
var nodemaintenancelog = logf.Log.WithName("nodemaintenance-resource")
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NodeMaintenanceValidator validates NodeMaintenance resources. Needed because we need a client for validation
// +k8s:deepcopy-gen=false
type NodeMaintenanceValidator struct {
client client.Client
}
var validator *NodeMaintenanceValidator
func (r *NodeMaintenance) SetupWebhookWithManager(mgr ctrl.Manager) error {
// init the validator!
validator = &NodeMaintenanceValidator{
client: mgr.GetClient(),
}
// check if OLM injected certs
certs := []string{filepath.Join(WebhookCertDir, WebhookCertName), filepath.Join(WebhookCertDir, WebhookKeyName)}
certsInjected := true
for _, fname := range certs {
if _, err := os.Stat(fname); err != nil {
certsInjected = false
break
}
}
if certsInjected {
server := mgr.GetWebhookServer()
server.CertDir = WebhookCertDir
server.CertName = WebhookCertName
server.KeyName = WebhookKeyName
} else {
nodemaintenancelog.Info("OLM injected certs for webhooks not found")
}
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}
// TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation.
//+kubebuilder:webhook:path=/validate-nodemaintenance-medik8s-io-v1beta1-nodemaintenance,mutating=false,failurePolicy=fail,sideEffects=None,groups=nodemaintenance.medik8s.io,resources=nodemaintenances,verbs=create;update,versions=v1beta1,name=vnodemaintenance.kb.io,admissionReviewVersions=v1
var _ webhook.Validator = &NodeMaintenance{}
// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
func (r *NodeMaintenance) ValidateCreate() error {
nodemaintenancelog.Info("validate create", "name", r.Name)
if validator == nil {
return fmt.Errorf("nodemaintenance validator isn't initialized yet")
}
return validator.ValidateCreate(r)
}
// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
func (r *NodeMaintenance) ValidateUpdate(old runtime.Object) error {
nodemaintenancelog.Info("validate update", "name", r.Name)
if validator == nil {
return fmt.Errorf("nodemaintenance validator isn't initialized yet")
}
return validator.ValidateUpdate(r, old.(*NodeMaintenance))
}
// ValidateDelete implements webhook.Validator so a webhook will be registered for the type
func (r *NodeMaintenance) ValidateDelete() error {
nodemaintenancelog.Info("validate delete", "name", r.Name)
if validator == nil {
return fmt.Errorf("nodemaintenance validator isn't initialized yet")
}
return nil
}
func (v *NodeMaintenanceValidator) ValidateCreate(nm *NodeMaintenance) error {
// Validate that node with given name exists
if err := v.validateNodeExists(nm.Spec.NodeName); err != nil {
nodemaintenancelog.Info("validation failed", "error", err)
return err
}
// Validate that no NodeMaintenance for given node exists yet
if err := v.validateNoNodeMaintenanceExists(nm.Spec.NodeName); err != nil {
nodemaintenancelog.Info("validation failed", "error", err)
return err
}
// Validate that NodeMaintenance for control-plane nodes don't violate quorum
if err := v.validateControlPlaneQuorum(nm.Spec.NodeName); err != nil {
nodemaintenancelog.Info("validation failed", "error", err)
return err
}
return nil
}
func (v *NodeMaintenanceValidator) ValidateUpdate(new, old *NodeMaintenance) error {
// Validate that node name didn't change
if new.Spec.NodeName != old.Spec.NodeName {
nodemaintenancelog.Info("validation failed", "error", ErrorNodeNameUpdateForbidden)
return fmt.Errorf(ErrorNodeNameUpdateForbidden)
}
return nil
}
func (v *NodeMaintenanceValidator) validateNodeExists(nodeName string) error {
if node, err := getNode(nodeName, v.client); err != nil {
return fmt.Errorf("could not get node for validating spec.NodeName, please try again: %v", err)
} else if node == nil {
return fmt.Errorf(ErrorNodeNotExists, nodeName)
}
return nil
}
func (v *NodeMaintenanceValidator) validateNoNodeMaintenanceExists(nodeName string) error {
var nodeMaintenances NodeMaintenanceList
if err := v.client.List(context.TODO(), &nodeMaintenances, &client.ListOptions{}); err != nil {
return fmt.Errorf("could not list NodeMaintenances for validating spec.NodeName, please try again: %v", err)
}
for _, nm := range nodeMaintenances.Items {
if nm.Spec.NodeName == nodeName {
return fmt.Errorf(ErrorNodeMaintenanceExists, nodeName)
}
}
return nil
}
func (v *NodeMaintenanceValidator) validateControlPlaneQuorum(nodeName string) error {
// check if the node is a control-plane node
if node, err := getNode(nodeName, v.client); err != nil {
return fmt.Errorf("could not get node for master/control-plane quorum validation, please try again: %v", err)
} else if node == nil {
// this should have been catched already, but just in case
return fmt.Errorf(ErrorNodeNotExists, nodeName)
} else if !isControlPlaneNode(node) {
// not a control-plane node, nothing to do
return nil
}
// check the etcd-quorum-guard PodDisruptionBudget if we can drain a control-plane node
disruptionsAllowed := int32(-1)
for _, pdbName := range []string{EtcdQuorumPDBNewName, EtcdQuorumPDBOldName} {
var pdb policyv1.PodDisruptionBudget
key := types.NamespacedName{
Namespace: EtcdQuorumPDBNamespace,
Name: pdbName,
}
if err := v.client.Get(context.TODO(), key, &pdb); err != nil {
if apierrors.IsNotFound(err) {
// try next one
continue
}
return fmt.Errorf("could not get the etcd quorum guard PDB for master/control-plane quorum validation, please try again: %v", err)
}
disruptionsAllowed = pdb.Status.DisruptionsAllowed
break
}
if disruptionsAllowed == -1 {
// TODO do we need a fallback for k8s clusters?
nodemaintenancelog.Info("etcd quorum guard PDB hasn't been found. Skipping master/control-plane quorum validation.")
return nil
}
if disruptionsAllowed == 0 {
return fmt.Errorf(ErrorControlPlaneQuorumViolation)
}
return nil
}
// if the returned node is nil, it wasn't found
func getNode(nodeName string, client client.Client) (*v1.Node, error) {
var node v1.Node
key := types.NamespacedName{
Name: nodeName,
}
if err := client.Get(context.TODO(), key, &node); err != nil {
if apierrors.IsNotFound(err) {
return nil, nil
}
return nil, fmt.Errorf("could not get node: %v", err)
}
return &node, nil
}
func isControlPlaneNode(node *v1.Node) bool {
if _, ok := node.Labels[LabelNameRoleMaster]; ok {
return true
}
if _, ok := node.Labels[LabelNameRoleControlPlane]; ok {
return true
}
return false
}