From 2776e40df7dba2b2fa056d0bff74c6458758c42a Mon Sep 17 00:00:00 2001 From: 3u13r Date: Mon, 25 Sep 2023 10:23:35 +0200 Subject: [PATCH] join: join over lb if available (#2348) * join: join over lb if available --- bootstrapper/cmd/bootstrapper/run.go | 1 - .../internal/joinclient/joinclient.go | 33 ++++++++---- .../internal/joinclient/joinclient_test.go | 8 +++ cli/internal/terraform/terraform/aws/main.tf | 17 +++++++ .../terraform/terraform/azure/main.tf | 12 ++++- cli/internal/terraform/terraform/gcp/main.tf | 14 ++++++ disk-mapper/internal/rejoinclient/BUILD.bazel | 1 + .../internal/rejoinclient/rejoinclient.go | 39 ++++++++++++--- .../rejoinclient/rejoinclient_test.go | 50 +++++++++++++------ disk-mapper/internal/setup/interface.go | 2 + internal/cloud/metadata/BUILD.bazel | 5 +- internal/cloud/metadata/metadata.go | 22 -------- 12 files changed, 142 insertions(+), 62 deletions(-) diff --git a/bootstrapper/cmd/bootstrapper/run.go b/bootstrapper/cmd/bootstrapper/run.go index d45ea9828..f3639bd59 100644 --- a/bootstrapper/cmd/bootstrapper/run.go +++ b/bootstrapper/cmd/bootstrapper/run.go @@ -99,5 +99,4 @@ type clusterInitJoiner interface { type metadataAPI interface { joinclient.MetadataAPI initserver.MetadataAPI - GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error) } diff --git a/bootstrapper/internal/joinclient/joinclient.go b/bootstrapper/internal/joinclient/joinclient.go index ef4295710..8bf902e7a 100644 --- a/bootstrapper/internal/joinclient/joinclient.go +++ b/bootstrapper/internal/joinclient/joinclient.go @@ -186,17 +186,29 @@ func (c *JoinClient) Stop() { } func (c *JoinClient) tryJoinWithAvailableServices() error { - ips, err := c.getControlPlaneIPs() - if err != nil { - return err - } + ctx, cancel := c.timeoutCtx() + defer cancel() - if len(ips) == 0 { + var endpoints []string + + ip, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx) + if err != nil { + return fmt.Errorf("failed to get load balancer endpoint: %w", err) + } + endpoints = append(endpoints, net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort))) + + ips, err := c.getControlPlaneIPs(ctx) + if err != nil { + return fmt.Errorf("failed to get control plane IPs: %w", err) + } + endpoints = append(endpoints, ips...) + + if len(endpoints) == 0 { return errors.New("no control plane IPs found") } - for _, ip := range ips { - err = c.join(net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort))) + for _, endpoint := range endpoints { + err = c.join(net.JoinHostPort(endpoint, strconv.Itoa(constants.JoinServiceNodePort))) if err == nil { return nil } @@ -357,10 +369,7 @@ func (c *JoinClient) getDiskUUID() (string, error) { return c.disk.UUID() } -func (c *JoinClient) getControlPlaneIPs() ([]string, error) { - ctx, cancel := c.timeoutCtx() - defer cancel() - +func (c *JoinClient) getControlPlaneIPs(ctx context.Context) ([]string, error) { instances, err := c.metadataAPI.List(ctx) if err != nil { c.log.With(zap.Error(err)).Errorf("Failed to list instances from metadata API") @@ -425,6 +434,8 @@ type MetadataAPI interface { List(ctx context.Context) ([]metadata.InstanceMetadata, error) // Self retrieves the current instance. Self(ctx context.Context) (metadata.InstanceMetadata, error) + // GetLoadBalancerEndpoint retrieves the load balancer endpoint. + GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error) } type encryptedDisk interface { diff --git a/bootstrapper/internal/joinclient/joinclient_test.go b/bootstrapper/internal/joinclient/joinclient_test.go index 6c850a17b..b527b1f23 100644 --- a/bootstrapper/internal/joinclient/joinclient_test.go +++ b/bootstrapper/internal/joinclient/joinclient_test.go @@ -330,6 +330,10 @@ func (s *stubRepeaterMetadataAPI) List(_ context.Context) ([]metadata.InstanceMe return s.listInstances, s.listErr } +func (s *stubRepeaterMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) { + return "", "", nil +} + type stubMetadataAPI struct { selfAnswerC chan selfAnswer listAnswerC chan listAnswer @@ -352,6 +356,10 @@ func (s *stubMetadataAPI) List(_ context.Context) ([]metadata.InstanceMetadata, return answer.instances, answer.err } +func (s *stubMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) { + return "", "", nil +} + type selfAnswer struct { instance metadata.InstanceMetadata err error diff --git a/cli/internal/terraform/terraform/aws/main.tf b/cli/internal/terraform/terraform/aws/main.tf index bae981fa4..e9fbf4844 100644 --- a/cli/internal/terraform/terraform/aws/main.tf +++ b/cli/internal/terraform/terraform/aws/main.tf @@ -27,6 +27,7 @@ locals { ports_verify = "30081" ports_recovery = "9999" ports_debugd = "4000" + ports_join = "30090" target_group_arns = { control-plane : flatten([ module.load_balancer_target_bootstrapper.target_group_arn, @@ -34,6 +35,7 @@ locals { module.load_balancer_target_verify.target_group_arn, module.load_balancer_target_recovery.target_group_arn, module.load_balancer_target_konnectivity.target_group_arn, + module.load_balancer_target_join.target_group_arn, var.debug ? [module.load_balancer_target_debugd[0].target_group_arn] : [], ]) worker : [] @@ -96,6 +98,7 @@ resource "aws_lb" "front_end" { internal = false load_balancer_type = "network" tags = local.tags + security_groups = [aws_security_group.security_group.id] dynamic "subnet_mapping" { # TODO(malt3): use for_each = toset(module.public_private_subnet.all_zones) @@ -111,6 +114,10 @@ resource "aws_lb" "front_end" { } } enable_cross_zone_load_balancing = true + + lifecycle { + ignore_changes = [security_groups] + } } resource "aws_security_group" "security_group" { @@ -255,6 +262,16 @@ module "load_balancer_target_konnectivity" { healthcheck_protocol = "TCP" } +module "load_balancer_target_join" { + source = "./modules/load_balancer_target" + name = "${local.name}-join" + vpc_id = aws_vpc.vpc.id + lb_arn = aws_lb.front_end.arn + port = local.ports_join + tags = local.tags + healthcheck_protocol = "TCP" +} + module "instance_group" { source = "./modules/instance_group" for_each = var.node_groups diff --git a/cli/internal/terraform/terraform/azure/main.tf b/cli/internal/terraform/terraform/azure/main.tf index 71c42ae1b..73a83d491 100644 --- a/cli/internal/terraform/terraform/azure/main.tf +++ b/cli/internal/terraform/terraform/azure/main.tf @@ -32,6 +32,7 @@ locals { ports_konnectivity = "8132" ports_verify = "30081" ports_recovery = "9999" + ports_join = "30090" ports_debugd = "4000" cidr_vpc_subnet_nodes = "192.168.178.0/24" cidr_vpc_subnet_pods = "10.10.0.0/16" @@ -182,6 +183,12 @@ module "loadbalancer_backend_control_plane" { protocol = "Tcp", path = null }, + { + name = "join", + port = local.ports_join, + protocol = "Tcp", + path = null + }, var.debug ? [{ name = "debugd", port = local.ports_debugd, @@ -231,8 +238,9 @@ resource "azurerm_network_security_group" "security_group" { { name = "kubernetes", priority = 101, dest_port_range = local.ports_kubernetes }, { name = "bootstrapper", priority = 102, dest_port_range = local.ports_bootstrapper }, { name = "konnectivity", priority = 103, dest_port_range = local.ports_konnectivity }, - { name = "recovery", priority = 104, dest_port_range = local.ports_recovery }, - var.debug ? [{ name = "debugd", priority = 105, dest_port_range = local.ports_debugd }] : [], + { name = "join", priority = 104, dest_port_range = local.ports_recovery }, + { name = "recovery", priority = 105, dest_port_range = local.ports_join }, + var.debug ? [{ name = "debugd", priority = 106, dest_port_range = local.ports_debugd }] : [], ]) content { name = security_rule.value.name diff --git a/cli/internal/terraform/terraform/gcp/main.tf b/cli/internal/terraform/terraform/gcp/main.tf index 63d1f284a..a24ddd07c 100644 --- a/cli/internal/terraform/terraform/gcp/main.tf +++ b/cli/internal/terraform/terraform/gcp/main.tf @@ -42,6 +42,7 @@ locals { ports_konnectivity = "8132" ports_verify = "30081" ports_recovery = "9999" + ports_join = "30090" ports_debugd = "4000" cidr_vpc_subnet_nodes = "192.168.178.0/24" cidr_vpc_subnet_pods = "10.10.0.0/16" @@ -52,6 +53,7 @@ locals { { name = "verify", port = local.ports_verify }, { name = "konnectivity", port = local.ports_konnectivity }, { name = "recovery", port = local.ports_recovery }, + { name = "join", port = local.ports_join }, var.debug ? [{ name = "debugd", port = local.ports_debugd }] : [], ]) node_groups_by_role = { @@ -120,6 +122,7 @@ resource "google_compute_firewall" "firewall_external" { local.ports_kubernetes, local.ports_konnectivity, local.ports_recovery, + local.ports_join, var.debug ? [local.ports_debugd] : [], ]) } @@ -234,6 +237,17 @@ module "loadbalancer_recovery" { frontend_labels = merge(local.labels, { constellation-use = "recovery" }) } +module "loadbalancer_join" { + source = "./modules/loadbalancer" + name = local.name + health_check = "TCP" + backend_port_name = "join" + backend_instance_groups = local.control_plane_instance_groups + ip_address = google_compute_global_address.loadbalancer_ip.self_link + port = local.ports_join + frontend_labels = merge(local.labels, { constellation-use = "join" }) +} + module "loadbalancer_debugd" { count = var.debug ? 1 : 0 // only deploy debugd in debug mode source = "./modules/loadbalancer" diff --git a/disk-mapper/internal/rejoinclient/BUILD.bazel b/disk-mapper/internal/rejoinclient/BUILD.bazel index 24c33c106..b7bae0f1c 100644 --- a/disk-mapper/internal/rejoinclient/BUILD.bazel +++ b/disk-mapper/internal/rejoinclient/BUILD.bazel @@ -8,6 +8,7 @@ go_library( visibility = ["//disk-mapper:__subpackages__"], deps = [ "//internal/cloud/metadata", + "//internal/constants", "//internal/logger", "//internal/role", "//joinservice/joinproto", diff --git a/disk-mapper/internal/rejoinclient/rejoinclient.go b/disk-mapper/internal/rejoinclient/rejoinclient.go index d1824c293..4f50b0b41 100644 --- a/disk-mapper/internal/rejoinclient/rejoinclient.go +++ b/disk-mapper/internal/rejoinclient/rejoinclient.go @@ -14,10 +14,13 @@ package rejoinclient import ( "context" "errors" + "fmt" "net" + "strconv" "time" "github.com/edgelesssys/constellation/v2/internal/cloud/metadata" + "github.com/edgelesssys/constellation/v2/internal/constants" "github.com/edgelesssys/constellation/v2/internal/logger" "github.com/edgelesssys/constellation/v2/internal/role" "github.com/edgelesssys/constellation/v2/joinservice/joinproto" @@ -75,7 +78,7 @@ func (c *RejoinClient) Start(ctx context.Context, diskUUID string) (diskKey, mea defer c.log.Infof("RejoinClient stopped") for { - endpoints, err := c.getControlPlaneEndpoints() + endpoints, err := c.getJoinEndpoints() if err != nil { c.log.With(zap.Error(err)).Errorf("Failed to get control-plane endpoints") } else { @@ -130,19 +133,39 @@ func (c *RejoinClient) requestRejoinTicket(endpoint string) (*joinproto.IssueRej return joinproto.NewAPIClient(conn).IssueRejoinTicket(ctx, &joinproto.IssueRejoinTicketRequest{DiskUuid: c.diskUUID}) } -// getControlPlaneEndpoints requests the available control-plane endpoints from the metadata API. +// getJoinEndpoints requests the available control-plane endpoints from the metadata API. // The list is filtered to remove *this* node if it is a restarting control-plane node. -func (c *RejoinClient) getControlPlaneEndpoints() ([]string, error) { +// Furthermore, the load balancer's endpoint is added. +func (c *RejoinClient) getJoinEndpoints() ([]string, error) { ctx, cancel := c.timeoutCtx() defer cancel() - endpoints, err := metadata.JoinServiceEndpoints(ctx, c.metadataAPI) + + joinEndpoints := []string{} + + lbEndpoint, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx) if err != nil { - return nil, err + return nil, fmt.Errorf("retrieving load balancer endpoint from cloud provider: %w", err) } + joinEndpoints = append(joinEndpoints, net.JoinHostPort(lbEndpoint, strconv.Itoa(constants.JoinServiceNodePort))) + + instances, err := c.metadataAPI.List(ctx) + if err != nil { + return nil, fmt.Errorf("retrieving instances list from cloud provider: %w", err) + } + + for _, instance := range instances { + if instance.Role == role.ControlPlane { + if instance.VPCIP != "" { + joinEndpoints = append(joinEndpoints, net.JoinHostPort(instance.VPCIP, strconv.Itoa(constants.JoinServiceNodePort))) + } + } + } + if c.nodeInfo.Role == role.ControlPlane { - return removeSelfFromEndpoints(c.nodeInfo.VPCIP, endpoints), nil + return removeSelfFromEndpoints(c.nodeInfo.VPCIP, joinEndpoints), nil } - return endpoints, nil + + return joinEndpoints, nil } // removeSelfFromEndpoints removes *this* node from the list of endpoints. @@ -169,4 +192,6 @@ type grpcDialer interface { type metadataAPI interface { // List retrieves all instances belonging to the current constellation. List(ctx context.Context) ([]metadata.InstanceMetadata, error) + // GetLoadBalancerEndpoint retrieves the load balancer endpoint. + GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error) } diff --git a/disk-mapper/internal/rejoinclient/rejoinclient_test.go b/disk-mapper/internal/rejoinclient/rejoinclient_test.go index d7f9e14e2..e78be25a3 100644 --- a/disk-mapper/internal/rejoinclient/rejoinclient_test.go +++ b/disk-mapper/internal/rejoinclient/rejoinclient_test.go @@ -123,7 +123,7 @@ func TestRemoveSelfFromEndpoints(t *testing.T) { } } -func TestGetControlPlaneEndpoints(t *testing.T) { +func TestGetJoinEndpoints(t *testing.T) { testInstances := []metadata.InstanceMetadata{ { Role: role.ControlPlane, @@ -154,7 +154,7 @@ func TestGetControlPlaneEndpoints(t *testing.T) { testCases := map[string]struct { nodeInfo metadata.InstanceMetadata meta stubMetadataAPI - wantInstances int + wantEndpoints int wantErr bool }{ "worker node": { @@ -163,9 +163,10 @@ func TestGetControlPlaneEndpoints(t *testing.T) { VPCIP: "192.0.2.1", }, meta: stubMetadataAPI{ - instances: testInstances, + instances: testInstances, + lbEndpoint: "192.0.2.100", }, - wantInstances: 3, + wantEndpoints: 4, }, "control-plane node not in list": { nodeInfo: metadata.InstanceMetadata{ @@ -173,9 +174,10 @@ func TestGetControlPlaneEndpoints(t *testing.T) { VPCIP: "192.0.2.1", }, meta: stubMetadataAPI{ - instances: testInstances, + instances: testInstances, + lbEndpoint: "192.0.2.100", }, - wantInstances: 3, + wantEndpoints: 4, }, "control-plane node in list": { nodeInfo: metadata.InstanceMetadata{ @@ -183,17 +185,28 @@ func TestGetControlPlaneEndpoints(t *testing.T) { VPCIP: "192.0.2.2", }, meta: stubMetadataAPI{ - instances: testInstances, + instances: testInstances, + lbEndpoint: "192.0.2.100", }, - wantInstances: 2, + wantEndpoints: 3, }, - "metadata error": { + "metadata list error": { nodeInfo: metadata.InstanceMetadata{ Role: role.ControlPlane, VPCIP: "192.0.2.1", }, meta: stubMetadataAPI{ - err: errors.New("error"), + listErr: assert.AnError, + }, + wantErr: true, + }, + "metadata load balancer error": { + nodeInfo: metadata.InstanceMetadata{ + Role: role.ControlPlane, + VPCIP: "192.0.2.1", + }, + meta: stubMetadataAPI{ + getLoadBalancerEndpointErr: assert.AnError, }, wantErr: true, }, @@ -205,13 +218,14 @@ func TestGetControlPlaneEndpoints(t *testing.T) { client := New(nil, tc.nodeInfo, tc.meta, logger.NewTest(t)) - endpoints, err := client.getControlPlaneEndpoints() + endpoints, err := client.getJoinEndpoints() if tc.wantErr { assert.Error(err) } else { assert.NoError(err) assert.NotContains(endpoints, tc.nodeInfo.VPCIP) - assert.Len(endpoints, tc.wantInstances) + // +1 for the load balancer endpoint + assert.Len(endpoints, tc.wantEndpoints) } }) } @@ -288,12 +302,18 @@ func TestStart(t *testing.T) { } type stubMetadataAPI struct { - instances []metadata.InstanceMetadata - err error + instances []metadata.InstanceMetadata + lbEndpoint string + getLoadBalancerEndpointErr error + listErr error } func (s stubMetadataAPI) List(context.Context) ([]metadata.InstanceMetadata, error) { - return s.instances, s.err + return s.instances, s.listErr +} + +func (s stubMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) { + return s.lbEndpoint, "", s.getLoadBalancerEndpointErr } type stubRejoinServiceAPI struct { diff --git a/disk-mapper/internal/setup/interface.go b/disk-mapper/internal/setup/interface.go index bcb33ec15..50bd008a7 100644 --- a/disk-mapper/internal/setup/interface.go +++ b/disk-mapper/internal/setup/interface.go @@ -7,6 +7,7 @@ SPDX-License-Identifier: AGPL-3.0-only package setup import ( + "context" "io/fs" "os" @@ -37,6 +38,7 @@ type ConfigurationGenerator interface { type MetadataAPI interface { metadata.InstanceSelfer metadata.InstanceLister + GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error) } // RecoveryDoer is an interface to perform key recovery operations. diff --git a/internal/cloud/metadata/BUILD.bazel b/internal/cloud/metadata/BUILD.bazel index 6826262a9..698b038c3 100644 --- a/internal/cloud/metadata/BUILD.bazel +++ b/internal/cloud/metadata/BUILD.bazel @@ -5,8 +5,5 @@ go_library( srcs = ["metadata.go"], importpath = "github.com/edgelesssys/constellation/v2/internal/cloud/metadata", visibility = ["//:__subpackages__"], - deps = [ - "//internal/constants", - "//internal/role", - ], + deps = ["//internal/role"], ) diff --git a/internal/cloud/metadata/metadata.go b/internal/cloud/metadata/metadata.go index 88a8270ca..7b3aed893 100644 --- a/internal/cloud/metadata/metadata.go +++ b/internal/cloud/metadata/metadata.go @@ -8,11 +8,7 @@ package metadata import ( "context" - "fmt" - "net" - "strconv" - "github.com/edgelesssys/constellation/v2/internal/constants" "github.com/edgelesssys/constellation/v2/internal/role" ) @@ -43,21 +39,3 @@ type InstanceLister interface { // List retrieves all instances belonging to the current constellation. List(ctx context.Context) ([]InstanceMetadata, error) } - -// JoinServiceEndpoints returns the list of endpoints for the join service, which are running on the control plane nodes. -func JoinServiceEndpoints(ctx context.Context, lister InstanceLister) ([]string, error) { - instances, err := lister.List(ctx) - if err != nil { - return nil, fmt.Errorf("retrieving instances list from cloud provider: %w", err) - } - joinEndpoints := []string{} - for _, instance := range instances { - if instance.Role == role.ControlPlane { - if instance.VPCIP != "" { - joinEndpoints = append(joinEndpoints, net.JoinHostPort(instance.VPCIP, strconv.Itoa(constants.JoinServiceNodePort))) - } - } - } - - return joinEndpoints, nil -}