terraform: use single zone loadbalancer frontend on AWS (#1983)

This change is required to ensure we have not tls handshake errors when connecting to the kubernetes api.
Currently, the certificates used by kube-apiserver pods contain a SAN field with the (single) public ip of the loadbalancer.
If we would allow multiple loadbalancer frontend ips, we could encounter cases where the certificate is only valid for one public ip,
while we try to connect to a different ip.
To prevent this, we consciously disable support for the multi-zone loadbalancer frontend on AWS for now.
This will be re-enabled in the future.
This commit is contained in:
Malte Poll 2023-06-30 16:56:31 +02:00 committed by GitHub
parent d95ddd01d3
commit 66f1333c31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 47 deletions

View File

@ -77,7 +77,11 @@ module "public_private_subnet" {
}
resource "aws_eip" "lb" {
for_each = toset(module.public_private_subnet.all_zones)
# TODO(malt3): use for_each = toset(module.public_private_subnet.all_zones)
# in a future version to support all availability zones in the chosen region
# This should only be done after we migrated to DNS-based addressing for the
# control-plane.
for_each = toset([var.zone])
domain = "vpc"
tags = local.tags
}
@ -92,9 +96,10 @@ resource "aws_lb" "front_end" {
# TODO(malt3): use for_each = toset(module.public_private_subnet.all_zones)
# in a future version to support all availability zones in the chosen region
# without needing to constantly replace the loadbalancer.
# This has to wait until the bootstrapper that we upgrade from (source version) can handle multiple AZs
# This has to wait until the bootstrapper that we upgrade from (source version) use
# DNS-based addressing for the control-plane.
# for_each = toset(module.public_private_subnet.all_zones)
for_each = toset(local.zones)
for_each = toset([var.zone])
content {
subnet_id = module.public_private_subnet.public_subnet_id[subnet_mapping.key]
allocation_id = aws_eip.lb[subnet_mapping.key].id
@ -267,6 +272,7 @@ module "instance_group" {
local.tags,
{ Name = local.name },
{ constellation-role = each.value.role },
{ constellation-node-group = each.key },
{ constellation-uid = local.uid },
{ constellation-init-secret-hash = local.initSecretHash },
{ "kubernetes.io/cluster/${local.name}" = "owned" }

View File

@ -15,7 +15,7 @@ locals {
# 0 => 192.168.176.0/24 (unused private subnet cidr)
# 1 => 192.168.177.0/24 (unused private subnet cidr)
legacy = 2 # => 192.168.178.0/24 (legacy private subnet)
a = 3 # => 192.168.178.1/24 (first newly created zonal private subnet)
a = 3 # => 192.168.179.0/24 (first newly created zonal private subnet)
b = 4
c = 5
d = 6

View File

@ -19,7 +19,6 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
@ -146,49 +145,27 @@ func (c *Cloud) GetLoadBalancerEndpoint(ctx context.Context) (string, error) {
if err != nil {
return "", fmt.Errorf("retrieving load balancer: %w", err)
}
if len(output.LoadBalancers) < 1 {
return "", fmt.Errorf("%d load balancers found; expected at least 1", len(output.LoadBalancers))
if len(output.LoadBalancers) != 1 {
return "", fmt.Errorf("%d load balancers found; expected 1", len(output.LoadBalancers))
}
nodeAZ, err := c.getAZ(ctx)
if err != nil {
return "", fmt.Errorf("retrieving availability zone: %w", err)
// TODO(malt3): Add support for multiple availability zones in the lb frontend.
// This can only be done after we have migrated to using DNS as the load balancer endpoint.
// At that point, we don't need to care about the number of availability zones anymore.
if len(output.LoadBalancers[0].AvailabilityZones) != 1 {
return "", fmt.Errorf("%d availability zones found; expected 1", len(output.LoadBalancers[0].AvailabilityZones))
}
var sameAZEndpoints []string
var endpoints []string
for _, lb := range output.LoadBalancers {
for az := range lb.AvailabilityZones {
azName := lb.AvailabilityZones[az].ZoneName
for _, lbAddress := range lb.AvailabilityZones[az].LoadBalancerAddresses {
if lbAddress.IpAddress != nil {
endpoints = append(endpoints, *lbAddress.IpAddress)
if azName != nil && *azName == nodeAZ {
sameAZEndpoints = append(sameAZEndpoints, *lbAddress.IpAddress)
}
}
}
}
if len(output.LoadBalancers[0].AvailabilityZones[0].LoadBalancerAddresses) != 1 {
return "", fmt.Errorf("%d load balancer addresses found; expected 1", len(output.LoadBalancers[0].AvailabilityZones[0].LoadBalancerAddresses))
}
if len(endpoints) < 1 {
return "", errors.New("no load balancer endpoints found")
if output.LoadBalancers[0].AvailabilityZones[0].LoadBalancerAddresses[0].IpAddress == nil {
return "", errors.New("load balancer address is nil")
}
// TODO(malt3): ideally, we would use DNS here instead of IP addresses.
// Requires changes to the infrastructure.
// for HA on AWS, there is one load balancer per AZ, so we can just return a random one
// prefer LBs in the same AZ as the instance
if len(sameAZEndpoints) > 0 {
return sameAZEndpoints[rand.Intn(len(sameAZEndpoints))], nil
}
// fall back to any LB. important for legacy clusters
return endpoints[rand.Intn(len(endpoints))], nil
return *output.LoadBalancers[0].AvailabilityZones[0].LoadBalancerAddresses[0].IpAddress, nil
}
// getARNsByTag returns a list of ARNs that have the given tag.
@ -320,14 +297,6 @@ func (c *Cloud) readInstanceTag(ctx context.Context, tag string) (string, error)
return findTag(out.Reservations[0].Instances[0].Tags, tag)
}
func (c *Cloud) getAZ(ctx context.Context) (string, error) {
identity, err := c.imds.GetInstanceIdentityDocument(ctx, &imds.GetInstanceIdentityDocumentInput{})
if err != nil {
return "", fmt.Errorf("retrieving instance identity: %w", err)
}
return identity.AvailabilityZone, nil
}
func findTag(tags []ec2Types.Tag, wantKey string) (string, error) {
for _, tag := range tags {
if tag.Key == nil || tag.Value == nil {