join: join over lb if available (#2348)

* join: join over lb if available
This commit is contained in:
3u13r 2023-09-25 10:23:35 +02:00 committed by GitHub
parent df77696620
commit 2776e40df7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 142 additions and 62 deletions

View File

@ -99,5 +99,4 @@ type clusterInitJoiner interface {
type metadataAPI interface {
joinclient.MetadataAPI
initserver.MetadataAPI
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}

View File

@ -186,17 +186,29 @@ func (c *JoinClient) Stop() {
}
func (c *JoinClient) tryJoinWithAvailableServices() error {
ips, err := c.getControlPlaneIPs()
if err != nil {
return err
}
ctx, cancel := c.timeoutCtx()
defer cancel()
if len(ips) == 0 {
var endpoints []string
ip, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx)
if err != nil {
return fmt.Errorf("failed to get load balancer endpoint: %w", err)
}
endpoints = append(endpoints, net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort)))
ips, err := c.getControlPlaneIPs(ctx)
if err != nil {
return fmt.Errorf("failed to get control plane IPs: %w", err)
}
endpoints = append(endpoints, ips...)
if len(endpoints) == 0 {
return errors.New("no control plane IPs found")
}
for _, ip := range ips {
err = c.join(net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort)))
for _, endpoint := range endpoints {
err = c.join(net.JoinHostPort(endpoint, strconv.Itoa(constants.JoinServiceNodePort)))
if err == nil {
return nil
}
@ -357,10 +369,7 @@ func (c *JoinClient) getDiskUUID() (string, error) {
return c.disk.UUID()
}
func (c *JoinClient) getControlPlaneIPs() ([]string, error) {
ctx, cancel := c.timeoutCtx()
defer cancel()
func (c *JoinClient) getControlPlaneIPs(ctx context.Context) ([]string, error) {
instances, err := c.metadataAPI.List(ctx)
if err != nil {
c.log.With(zap.Error(err)).Errorf("Failed to list instances from metadata API")
@ -425,6 +434,8 @@ type MetadataAPI interface {
List(ctx context.Context) ([]metadata.InstanceMetadata, error)
// Self retrieves the current instance.
Self(ctx context.Context) (metadata.InstanceMetadata, error)
// GetLoadBalancerEndpoint retrieves the load balancer endpoint.
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}
type encryptedDisk interface {

View File

@ -330,6 +330,10 @@ func (s *stubRepeaterMetadataAPI) List(_ context.Context) ([]metadata.InstanceMe
return s.listInstances, s.listErr
}
func (s *stubRepeaterMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) {
return "", "", nil
}
type stubMetadataAPI struct {
selfAnswerC chan selfAnswer
listAnswerC chan listAnswer
@ -352,6 +356,10 @@ func (s *stubMetadataAPI) List(_ context.Context) ([]metadata.InstanceMetadata,
return answer.instances, answer.err
}
func (s *stubMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) {
return "", "", nil
}
type selfAnswer struct {
instance metadata.InstanceMetadata
err error

View File

@ -27,6 +27,7 @@ locals {
ports_verify = "30081"
ports_recovery = "9999"
ports_debugd = "4000"
ports_join = "30090"
target_group_arns = {
control-plane : flatten([
module.load_balancer_target_bootstrapper.target_group_arn,
@ -34,6 +35,7 @@ locals {
module.load_balancer_target_verify.target_group_arn,
module.load_balancer_target_recovery.target_group_arn,
module.load_balancer_target_konnectivity.target_group_arn,
module.load_balancer_target_join.target_group_arn,
var.debug ? [module.load_balancer_target_debugd[0].target_group_arn] : [],
])
worker : []
@ -96,6 +98,7 @@ resource "aws_lb" "front_end" {
internal = false
load_balancer_type = "network"
tags = local.tags
security_groups = [aws_security_group.security_group.id]
dynamic "subnet_mapping" {
# TODO(malt3): use for_each = toset(module.public_private_subnet.all_zones)
@ -111,6 +114,10 @@ resource "aws_lb" "front_end" {
}
}
enable_cross_zone_load_balancing = true
lifecycle {
ignore_changes = [security_groups]
}
}
resource "aws_security_group" "security_group" {
@ -255,6 +262,16 @@ module "load_balancer_target_konnectivity" {
healthcheck_protocol = "TCP"
}
module "load_balancer_target_join" {
source = "./modules/load_balancer_target"
name = "${local.name}-join"
vpc_id = aws_vpc.vpc.id
lb_arn = aws_lb.front_end.arn
port = local.ports_join
tags = local.tags
healthcheck_protocol = "TCP"
}
module "instance_group" {
source = "./modules/instance_group"
for_each = var.node_groups

View File

@ -32,6 +32,7 @@ locals {
ports_konnectivity = "8132"
ports_verify = "30081"
ports_recovery = "9999"
ports_join = "30090"
ports_debugd = "4000"
cidr_vpc_subnet_nodes = "192.168.178.0/24"
cidr_vpc_subnet_pods = "10.10.0.0/16"
@ -182,6 +183,12 @@ module "loadbalancer_backend_control_plane" {
protocol = "Tcp",
path = null
},
{
name = "join",
port = local.ports_join,
protocol = "Tcp",
path = null
},
var.debug ? [{
name = "debugd",
port = local.ports_debugd,
@ -231,8 +238,9 @@ resource "azurerm_network_security_group" "security_group" {
{ name = "kubernetes", priority = 101, dest_port_range = local.ports_kubernetes },
{ name = "bootstrapper", priority = 102, dest_port_range = local.ports_bootstrapper },
{ name = "konnectivity", priority = 103, dest_port_range = local.ports_konnectivity },
{ name = "recovery", priority = 104, dest_port_range = local.ports_recovery },
var.debug ? [{ name = "debugd", priority = 105, dest_port_range = local.ports_debugd }] : [],
{ name = "join", priority = 104, dest_port_range = local.ports_recovery },
{ name = "recovery", priority = 105, dest_port_range = local.ports_join },
var.debug ? [{ name = "debugd", priority = 106, dest_port_range = local.ports_debugd }] : [],
])
content {
name = security_rule.value.name

View File

@ -42,6 +42,7 @@ locals {
ports_konnectivity = "8132"
ports_verify = "30081"
ports_recovery = "9999"
ports_join = "30090"
ports_debugd = "4000"
cidr_vpc_subnet_nodes = "192.168.178.0/24"
cidr_vpc_subnet_pods = "10.10.0.0/16"
@ -52,6 +53,7 @@ locals {
{ name = "verify", port = local.ports_verify },
{ name = "konnectivity", port = local.ports_konnectivity },
{ name = "recovery", port = local.ports_recovery },
{ name = "join", port = local.ports_join },
var.debug ? [{ name = "debugd", port = local.ports_debugd }] : [],
])
node_groups_by_role = {
@ -120,6 +122,7 @@ resource "google_compute_firewall" "firewall_external" {
local.ports_kubernetes,
local.ports_konnectivity,
local.ports_recovery,
local.ports_join,
var.debug ? [local.ports_debugd] : [],
])
}
@ -234,6 +237,17 @@ module "loadbalancer_recovery" {
frontend_labels = merge(local.labels, { constellation-use = "recovery" })
}
module "loadbalancer_join" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "join"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_join
frontend_labels = merge(local.labels, { constellation-use = "join" })
}
module "loadbalancer_debugd" {
count = var.debug ? 1 : 0 // only deploy debugd in debug mode
source = "./modules/loadbalancer"

View File

@ -8,6 +8,7 @@ go_library(
visibility = ["//disk-mapper:__subpackages__"],
deps = [
"//internal/cloud/metadata",
"//internal/constants",
"//internal/logger",
"//internal/role",
"//joinservice/joinproto",

View File

@ -14,10 +14,13 @@ package rejoinclient
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"time"
"github.com/edgelesssys/constellation/v2/internal/cloud/metadata"
"github.com/edgelesssys/constellation/v2/internal/constants"
"github.com/edgelesssys/constellation/v2/internal/logger"
"github.com/edgelesssys/constellation/v2/internal/role"
"github.com/edgelesssys/constellation/v2/joinservice/joinproto"
@ -75,7 +78,7 @@ func (c *RejoinClient) Start(ctx context.Context, diskUUID string) (diskKey, mea
defer c.log.Infof("RejoinClient stopped")
for {
endpoints, err := c.getControlPlaneEndpoints()
endpoints, err := c.getJoinEndpoints()
if err != nil {
c.log.With(zap.Error(err)).Errorf("Failed to get control-plane endpoints")
} else {
@ -130,19 +133,39 @@ func (c *RejoinClient) requestRejoinTicket(endpoint string) (*joinproto.IssueRej
return joinproto.NewAPIClient(conn).IssueRejoinTicket(ctx, &joinproto.IssueRejoinTicketRequest{DiskUuid: c.diskUUID})
}
// getControlPlaneEndpoints requests the available control-plane endpoints from the metadata API.
// getJoinEndpoints requests the available control-plane endpoints from the metadata API.
// The list is filtered to remove *this* node if it is a restarting control-plane node.
func (c *RejoinClient) getControlPlaneEndpoints() ([]string, error) {
// Furthermore, the load balancer's endpoint is added.
func (c *RejoinClient) getJoinEndpoints() ([]string, error) {
ctx, cancel := c.timeoutCtx()
defer cancel()
endpoints, err := metadata.JoinServiceEndpoints(ctx, c.metadataAPI)
joinEndpoints := []string{}
lbEndpoint, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx)
if err != nil {
return nil, err
return nil, fmt.Errorf("retrieving load balancer endpoint from cloud provider: %w", err)
}
joinEndpoints = append(joinEndpoints, net.JoinHostPort(lbEndpoint, strconv.Itoa(constants.JoinServiceNodePort)))
instances, err := c.metadataAPI.List(ctx)
if err != nil {
return nil, fmt.Errorf("retrieving instances list from cloud provider: %w", err)
}
for _, instance := range instances {
if instance.Role == role.ControlPlane {
if instance.VPCIP != "" {
joinEndpoints = append(joinEndpoints, net.JoinHostPort(instance.VPCIP, strconv.Itoa(constants.JoinServiceNodePort)))
}
}
}
if c.nodeInfo.Role == role.ControlPlane {
return removeSelfFromEndpoints(c.nodeInfo.VPCIP, endpoints), nil
return removeSelfFromEndpoints(c.nodeInfo.VPCIP, joinEndpoints), nil
}
return endpoints, nil
return joinEndpoints, nil
}
// removeSelfFromEndpoints removes *this* node from the list of endpoints.
@ -169,4 +192,6 @@ type grpcDialer interface {
type metadataAPI interface {
// List retrieves all instances belonging to the current constellation.
List(ctx context.Context) ([]metadata.InstanceMetadata, error)
// GetLoadBalancerEndpoint retrieves the load balancer endpoint.
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}

View File

@ -123,7 +123,7 @@ func TestRemoveSelfFromEndpoints(t *testing.T) {
}
}
func TestGetControlPlaneEndpoints(t *testing.T) {
func TestGetJoinEndpoints(t *testing.T) {
testInstances := []metadata.InstanceMetadata{
{
Role: role.ControlPlane,
@ -154,7 +154,7 @@ func TestGetControlPlaneEndpoints(t *testing.T) {
testCases := map[string]struct {
nodeInfo metadata.InstanceMetadata
meta stubMetadataAPI
wantInstances int
wantEndpoints int
wantErr bool
}{
"worker node": {
@ -163,9 +163,10 @@ func TestGetControlPlaneEndpoints(t *testing.T) {
VPCIP: "192.0.2.1",
},
meta: stubMetadataAPI{
instances: testInstances,
instances: testInstances,
lbEndpoint: "192.0.2.100",
},
wantInstances: 3,
wantEndpoints: 4,
},
"control-plane node not in list": {
nodeInfo: metadata.InstanceMetadata{
@ -173,9 +174,10 @@ func TestGetControlPlaneEndpoints(t *testing.T) {
VPCIP: "192.0.2.1",
},
meta: stubMetadataAPI{
instances: testInstances,
instances: testInstances,
lbEndpoint: "192.0.2.100",
},
wantInstances: 3,
wantEndpoints: 4,
},
"control-plane node in list": {
nodeInfo: metadata.InstanceMetadata{
@ -183,17 +185,28 @@ func TestGetControlPlaneEndpoints(t *testing.T) {
VPCIP: "192.0.2.2",
},
meta: stubMetadataAPI{
instances: testInstances,
instances: testInstances,
lbEndpoint: "192.0.2.100",
},
wantInstances: 2,
wantEndpoints: 3,
},
"metadata error": {
"metadata list error": {
nodeInfo: metadata.InstanceMetadata{
Role: role.ControlPlane,
VPCIP: "192.0.2.1",
},
meta: stubMetadataAPI{
err: errors.New("error"),
listErr: assert.AnError,
},
wantErr: true,
},
"metadata load balancer error": {
nodeInfo: metadata.InstanceMetadata{
Role: role.ControlPlane,
VPCIP: "192.0.2.1",
},
meta: stubMetadataAPI{
getLoadBalancerEndpointErr: assert.AnError,
},
wantErr: true,
},
@ -205,13 +218,14 @@ func TestGetControlPlaneEndpoints(t *testing.T) {
client := New(nil, tc.nodeInfo, tc.meta, logger.NewTest(t))
endpoints, err := client.getControlPlaneEndpoints()
endpoints, err := client.getJoinEndpoints()
if tc.wantErr {
assert.Error(err)
} else {
assert.NoError(err)
assert.NotContains(endpoints, tc.nodeInfo.VPCIP)
assert.Len(endpoints, tc.wantInstances)
// +1 for the load balancer endpoint
assert.Len(endpoints, tc.wantEndpoints)
}
})
}
@ -288,12 +302,18 @@ func TestStart(t *testing.T) {
}
type stubMetadataAPI struct {
instances []metadata.InstanceMetadata
err error
instances []metadata.InstanceMetadata
lbEndpoint string
getLoadBalancerEndpointErr error
listErr error
}
func (s stubMetadataAPI) List(context.Context) ([]metadata.InstanceMetadata, error) {
return s.instances, s.err
return s.instances, s.listErr
}
func (s stubMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) {
return s.lbEndpoint, "", s.getLoadBalancerEndpointErr
}
type stubRejoinServiceAPI struct {

View File

@ -7,6 +7,7 @@ SPDX-License-Identifier: AGPL-3.0-only
package setup
import (
"context"
"io/fs"
"os"
@ -37,6 +38,7 @@ type ConfigurationGenerator interface {
type MetadataAPI interface {
metadata.InstanceSelfer
metadata.InstanceLister
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}
// RecoveryDoer is an interface to perform key recovery operations.

View File

@ -5,8 +5,5 @@ go_library(
srcs = ["metadata.go"],
importpath = "github.com/edgelesssys/constellation/v2/internal/cloud/metadata",
visibility = ["//:__subpackages__"],
deps = [
"//internal/constants",
"//internal/role",
],
deps = ["//internal/role"],
)

View File

@ -8,11 +8,7 @@ package metadata
import (
"context"
"fmt"
"net"
"strconv"
"github.com/edgelesssys/constellation/v2/internal/constants"
"github.com/edgelesssys/constellation/v2/internal/role"
)
@ -43,21 +39,3 @@ type InstanceLister interface {
// List retrieves all instances belonging to the current constellation.
List(ctx context.Context) ([]InstanceMetadata, error)
}
// JoinServiceEndpoints returns the list of endpoints for the join service, which are running on the control plane nodes.
func JoinServiceEndpoints(ctx context.Context, lister InstanceLister) ([]string, error) {
instances, err := lister.List(ctx)
if err != nil {
return nil, fmt.Errorf("retrieving instances list from cloud provider: %w", err)
}
joinEndpoints := []string{}
for _, instance := range instances {
if instance.Role == role.ControlPlane {
if instance.VPCIP != "" {
joinEndpoints = append(joinEndpoints, net.JoinHostPort(instance.VPCIP, strconv.Itoa(constants.JoinServiceNodePort)))
}
}
}
return joinEndpoints, nil
}