operators: infrastructure autodiscovery (#1958)

* helm: configure GCP cloud controller manager to search in all zones of a region

See also: d716fdd452/providers/gce/gce.go (L376-L380)

* operators: add nodeGroupName to ScalingGroup CRD

NodeGroupName is the human friendly name of the node group that will be exposed to customers via the Constellation config in the future.

* operators: support simple executor / scheduler to reconcile on non-k8s resources

* operators: add new return type for ListScalingGroups to support arbitrary node groups

* operators: ListScalingGroups should return additionally created node groups on AWS

* operators: ListScalingGroups should return additionally created node groups on Azure

* operators: ListScalingGroups should return additionally created node groups on GCP

* operators: ListScalingGroups should return additionally created node groups on unsupported CSPs

* operators: implement external scaling group reconciler

This controller scans the cloud provider infrastructure and changes k8s resources accordingly.
It creates ScaleSet resources when new node groups are created and deletes them if the node groups are removed.

* operators: no longer create scale sets when the operator starts

In the future, scale sets are created dynamically.

* operators: watch for node join/leave events using a controller

* operators: deploy new controllers

* docs: update auto scaling documentation with support for node groups
This commit is contained in:
Malte Poll 2023-07-05 07:27:34 +02:00 committed by Adrian Stobbe
parent 10a540c290
commit 388ff011a3
36 changed files with 1836 additions and 232 deletions

View file

@ -23,6 +23,7 @@ go_library(
visibility = ["//operators/constellation-node-operator:__subpackages__"],
deps = [
"//operators/constellation-node-operator/api/v1alpha1",
"//operators/constellation-node-operator/internal/cloud/api",
"@com_github_googleapis_gax_go_v2//:gax-go",
"@com_github_spf13_afero//:afero",
"@com_google_cloud_go_compute//apiv1",
@ -51,6 +52,7 @@ go_test(
embed = [":client"],
deps = [
"//operators/constellation-node-operator/api/v1alpha1",
"//operators/constellation-node-operator/internal/cloud/api",
"@com_github_googleapis_gax_go_v2//:gax-go",
"@com_github_spf13_afero//:afero",
"@com_github_stretchr_testify//assert",

View file

@ -13,6 +13,8 @@ import (
"strings"
"cloud.google.com/go/compute/apiv1/computepb"
updatev1alpha1 "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/api/v1alpha1"
cspapi "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/cloud/api"
"google.golang.org/api/iterator"
)
@ -106,7 +108,8 @@ func (c *Client) GetAutoscalingGroupName(scalingGroupID string) (string, error)
}
// ListScalingGroups retrieves a list of scaling groups for the cluster.
func (c *Client) ListScalingGroups(ctx context.Context, uid string) (controlPlaneGroupIDs []string, workerGroupIDs []string, err error) {
func (c *Client) ListScalingGroups(ctx context.Context, uid string) ([]cspapi.ScalingGroup, error) {
results := []cspapi.ScalingGroup{}
iter := c.instanceGroupManagersAPI.AggregatedList(ctx, &computepb.AggregatedListInstanceGroupManagersRequest{
Project: c.projectID,
})
@ -115,7 +118,7 @@ func (c *Client) ListScalingGroups(ctx context.Context, uid string) (controlPlan
break
}
if err != nil {
return nil, nil, fmt.Errorf("listing instance group managers: %w", err)
return nil, fmt.Errorf("listing instance group managers: %w", err)
}
if instanceGroupManagerScopedListPair.Value == nil {
continue
@ -134,7 +137,7 @@ func (c *Client) ListScalingGroups(ctx context.Context, uid string) (controlPlan
InstanceTemplate: templateURI[len(templateURI)-1],
})
if err != nil {
return nil, nil, fmt.Errorf("getting instance template: %w", err)
return nil, fmt.Errorf("getting instance template: %w", err)
}
if template.Properties == nil || template.Properties.Labels == nil {
continue
@ -145,18 +148,43 @@ func (c *Client) ListScalingGroups(ctx context.Context, uid string) (controlPlan
groupID, err := c.canonicalInstanceGroupID(ctx, *grpManager.SelfLink)
if err != nil {
return nil, nil, fmt.Errorf("normalizing instance group ID: %w", err)
return nil, fmt.Errorf("normalizing instance group ID: %w", err)
}
switch strings.ToLower(template.Properties.Labels["constellation-role"]) {
case "control-plane", "controlplane":
controlPlaneGroupIDs = append(controlPlaneGroupIDs, groupID)
case "worker":
workerGroupIDs = append(workerGroupIDs, groupID)
role := updatev1alpha1.NodeRoleFromString(template.Properties.Labels["constellation-role"])
name, err := c.GetScalingGroupName(groupID)
if err != nil {
return nil, fmt.Errorf("getting scaling group name: %w", err)
}
nodeGroupName := template.Properties.Labels["constellation-node-group"]
// fallback for legacy clusters
// TODO(malt3): remove this fallback once we can assume all clusters have the correct labels
if nodeGroupName == "" {
switch role {
case updatev1alpha1.ControlPlaneRole:
nodeGroupName = "control_plane_default"
case updatev1alpha1.WorkerRole:
nodeGroupName = "worker_default"
}
}
autoscalerGroupName, err := c.GetAutoscalingGroupName(groupID)
if err != nil {
return nil, fmt.Errorf("getting autoscaling group name: %w", err)
}
results = append(results, cspapi.ScalingGroup{
Name: name,
NodeGroupName: nodeGroupName,
GroupID: groupID,
AutoscalingGroupName: autoscalerGroupName,
Role: role,
})
}
}
return controlPlaneGroupIDs, workerGroupIDs, nil
return results, nil
}
func (c *Client) getScalingGroupTemplate(ctx context.Context, scalingGroupID string) (*computepb.InstanceTemplate, error) {

View file

@ -12,6 +12,7 @@ import (
"testing"
"cloud.google.com/go/compute/apiv1/computepb"
cspapi "github.com/edgelesssys/constellation/v2/operators/constellation-node-operator/v2/internal/cloud/api"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
@ -330,8 +331,7 @@ func TestListScalingGroups(t *testing.T) {
templateLabels map[string]string
listInstanceGroupManagersErr error
templateGetErr error
wantControlPlanes []string
wantWorkers []string
wantGroups []cspapi.ScalingGroup
wantErr bool
}{
"list instance group managers fails": {
@ -353,8 +353,14 @@ func TestListScalingGroups(t *testing.T) {
"constellation-uid": "uid",
"constellation-role": "control-plane",
},
wantControlPlanes: []string{
"projects/project/zones/zone/instanceGroupManagers/test-control-plane-uid",
wantGroups: []cspapi.ScalingGroup{
{
Name: "test-control-plane-uid",
NodeGroupName: "control_plane_default",
GroupID: "projects/project/zones/zone/instanceGroupManagers/test-control-plane-uid",
AutoscalingGroupName: "https://www.googleapis.com/compute/v1/projects/project/zones/zone/instanceGroups/test-control-plane-uid",
Role: "ControlPlane",
},
},
},
"list instance group managers for worker": {
@ -365,8 +371,33 @@ func TestListScalingGroups(t *testing.T) {
"constellation-uid": "uid",
"constellation-role": "worker",
},
wantWorkers: []string{
"projects/project/zones/zone/instanceGroupManagers/test-worker-uid",
wantGroups: []cspapi.ScalingGroup{
{
Name: "test-worker-uid",
NodeGroupName: "worker_default",
GroupID: "projects/project/zones/zone/instanceGroupManagers/test-worker-uid",
AutoscalingGroupName: "https://www.googleapis.com/compute/v1/projects/project/zones/zone/instanceGroups/test-worker-uid",
Role: "Worker",
},
},
},
"list instance group managers with custom group name": {
name: proto.String("test-worker-uid"),
groupID: proto.String("projects/project/zones/zone/instanceGroupManagers/test-worker-uid"),
templateRef: proto.String("projects/project/global/instanceTemplates/test-control-plane-uid"),
templateLabels: map[string]string{
"constellation-uid": "uid",
"constellation-role": "worker",
"constellation-node-group": "custom-group-name",
},
wantGroups: []cspapi.ScalingGroup{
{
Name: "test-worker-uid",
NodeGroupName: "custom-group-name",
GroupID: "projects/project/zones/zone/instanceGroupManagers/test-worker-uid",
AutoscalingGroupName: "https://www.googleapis.com/compute/v1/projects/project/zones/zone/instanceGroups/test-worker-uid",
Role: "Worker",
},
},
},
"listing instance group managers is not dependant on resource name": {
@ -377,8 +408,14 @@ func TestListScalingGroups(t *testing.T) {
"constellation-uid": "uid",
"constellation-role": "control-plane",
},
wantControlPlanes: []string{
"projects/project/zones/zone/instanceGroupManagers/some-instance-group-manager",
wantGroups: []cspapi.ScalingGroup{
{
Name: "some-instance-group-manager",
NodeGroupName: "control_plane_default",
GroupID: "projects/project/zones/zone/instanceGroupManagers/some-instance-group-manager",
AutoscalingGroupName: "https://www.googleapis.com/compute/v1/projects/project/zones/zone/instanceGroups/some-instance-group-manager",
Role: "ControlPlane",
},
},
},
"unrelated instance group manager": {
@ -415,14 +452,13 @@ func TestListScalingGroups(t *testing.T) {
getErr: tc.templateGetErr,
},
}
gotControlPlanes, gotWorkers, err := client.ListScalingGroups(context.Background(), "uid")
gotGroups, err := client.ListScalingGroups(context.Background(), "uid")
if tc.wantErr {
assert.Error(err)
return
}
require.NoError(err)
assert.ElementsMatch(tc.wantControlPlanes, gotControlPlanes)
assert.ElementsMatch(tc.wantWorkers, gotWorkers)
assert.ElementsMatch(tc.wantGroups, gotGroups)
})
}
}