terraform: gcp node groups (#1941)

* terraform: GCP node groups

* cli: marshal GCP node groups to terraform variables

This does not have any side effects for users.
We still strictly create one control-plane and one worker group.
This is a preparation for enabling customizable node groups in the future.
This commit is contained in:
Malte Poll 2023-06-19 13:02:01 +02:00 committed by GitHub
parent 5823aa2438
commit 2808012c9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 545 additions and 163 deletions

View file

@ -44,6 +44,20 @@ locals {
cidr_vpc_subnet_nodes = "192.168.178.0/24"
cidr_vpc_subnet_pods = "10.10.0.0/16"
kube_env = "AUTOSCALER_ENV_VARS: kube_reserved=cpu=1060m,memory=1019Mi,ephemeral-storage=41Gi;node_labels=;os=linux;os_distribution=cos;evictionHard="
control_plane_named_ports = flatten([
{ name = "kubernetes", port = local.ports_kubernetes },
{ name = "bootstrapper", port = local.ports_bootstrapper },
{ name = "verify", port = local.ports_verify },
{ name = "konnectivity", port = local.ports_konnectivity },
{ name = "recovery", port = local.ports_recovery },
var.debug ? [{ name = "debugd", port = local.ports_debugd }] : [],
])
node_groups_by_role = {
for name, node_group in var.node_groups : node_group.role => name...
}
control_plane_instance_groups = [
for control_plane in local.node_groups_by_role["ControlPlane"] : module.instance_group[control_plane].instance_group
]
}
resource "random_id" "uid" {
@ -134,48 +148,26 @@ resource "google_compute_firewall" "firewall_internal_pods" {
allow { protocol = "icmp" }
}
module "instance_group_control_plane" {
source = "./modules/instance_group"
name = local.name
role = "ControlPlane"
uid = local.uid
instance_type = var.instance_type
instance_count = var.control_plane_count
image_id = var.image_id
disk_size = var.state_disk_size
disk_type = var.state_disk_type
network = google_compute_network.vpc_network.id
subnetwork = google_compute_subnetwork.vpc_subnetwork.id
alias_ip_range_name = google_compute_subnetwork.vpc_subnetwork.secondary_ip_range[0].range_name
kube_env = local.kube_env
debug = var.debug
named_ports = flatten([
{ name = "kubernetes", port = local.ports_kubernetes },
{ name = "bootstrapper", port = local.ports_bootstrapper },
{ name = "verify", port = local.ports_verify },
{ name = "konnectivity", port = local.ports_konnectivity },
{ name = "recovery", port = local.ports_recovery },
var.debug ? [{ name = "debugd", port = local.ports_debugd }] : [],
])
labels = local.labels
init_secret_hash = local.initSecretHash
}
module "instance_group_worker" {
module "instance_group" {
source = "./modules/instance_group"
name = "${local.name}-1"
role = "Worker"
for_each = var.node_groups
base_name = local.name
node_group_name = each.key
role = each.value.role
zone = each.value.zone
uid = local.uid
instance_type = var.instance_type
instance_count = var.worker_count
instance_type = each.value.instance_type
instance_count = each.value.initial_count
image_id = var.image_id
disk_size = var.state_disk_size
disk_type = var.state_disk_type
disk_size = each.value.disk_size
disk_type = each.value.disk_type
network = google_compute_network.vpc_network.id
subnetwork = google_compute_subnetwork.vpc_subnetwork.id
alias_ip_range_name = google_compute_subnetwork.vpc_subnetwork.secondary_ip_range[0].range_name
kube_env = local.kube_env
debug = var.debug
named_ports = each.value.role == "ControlPlane" ? local.control_plane_named_ports : []
labels = local.labels
init_secret_hash = local.initSecretHash
}
@ -185,68 +177,78 @@ resource "google_compute_global_address" "loadbalancer_ip" {
}
module "loadbalancer_kube" {
source = "./modules/loadbalancer"
name = local.name
health_check = "HTTPS"
backend_port_name = "kubernetes"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_kubernetes
frontend_labels = merge(local.labels, { constellation-use = "kubernetes" })
source = "./modules/loadbalancer"
name = local.name
health_check = "HTTPS"
backend_port_name = "kubernetes"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_kubernetes
frontend_labels = merge(local.labels, { constellation-use = "kubernetes" })
}
module "loadbalancer_boot" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "bootstrapper"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_bootstrapper
frontend_labels = merge(local.labels, { constellation-use = "bootstrapper" })
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "bootstrapper"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_bootstrapper
frontend_labels = merge(local.labels, { constellation-use = "bootstrapper" })
}
module "loadbalancer_verify" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "verify"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_verify
frontend_labels = merge(local.labels, { constellation-use = "verify" })
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "verify"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_verify
frontend_labels = merge(local.labels, { constellation-use = "verify" })
}
module "loadbalancer_konnectivity" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "konnectivity"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_konnectivity
frontend_labels = merge(local.labels, { constellation-use = "konnectivity" })
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "konnectivity"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_konnectivity
frontend_labels = merge(local.labels, { constellation-use = "konnectivity" })
}
module "loadbalancer_recovery" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "recovery"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_recovery
frontend_labels = merge(local.labels, { constellation-use = "recovery" })
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "recovery"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_recovery
frontend_labels = merge(local.labels, { constellation-use = "recovery" })
}
module "loadbalancer_debugd" {
count = var.debug ? 1 : 0 // only deploy debugd in debug mode
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "debugd"
backend_instance_group = module.instance_group_control_plane.instance_group
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_debugd
frontend_labels = merge(local.labels, { constellation-use = "debugd" })
count = var.debug ? 1 : 0 // only deploy debugd in debug mode
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "debugd"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_debugd
frontend_labels = merge(local.labels, { constellation-use = "debugd" })
}
moved {
from = module.instance_group_control_plane
to = module.instance_group["control_plane_default"]
}
moved {
from = module.instance_group_worker
to = module.instance_group["worker_default"]
}

View file

@ -4,20 +4,42 @@ terraform {
source = "hashicorp/google"
version = "4.69.1"
}
random = {
source = "hashicorp/random"
version = "3.5.1"
}
}
}
locals {
# migration: allow the old node group names to work since they were created without the uid
# and without multiple node groups in mind
# node_group: worker_default => name == "<base>-1-worker"
# node_group: control_plane_default => name: "<base>-control-plane"
# new names:
# node_group: foo, role: Worker => name == "<base>-worker-<uid>"
# node_group: bar, role: ControlPlane => name == "<base>-control-plane-<uid>"
role_dashed = var.role == "ControlPlane" ? "control-plane" : "worker"
name = "${var.name}-${local.role_dashed}"
group_uid = random_id.uid.hex
maybe_uid = (var.node_group_name == "control_plane_default" || var.node_group_name == "worker_default") ? "" : "-${local.group_uid}"
maybe_one = var.node_group_name == "worker_default" ? "-1" : ""
name = "${var.base_name}${local.maybe_one}-${local.role_dashed}${local.maybe_uid}"
state_disk_name = "state-disk"
}
resource "random_id" "uid" {
byte_length = 4
}
resource "google_compute_instance_template" "template" {
name = local.name
machine_type = var.instance_type
tags = ["constellation-${var.uid}"] // Note that this is also applied as a label
labels = merge(var.labels, { constellation-role = local.role_dashed })
labels = merge(var.labels, {
constellation-role = local.role_dashed,
constellation-node-group = var.node_group_name,
})
confidential_instance_config {
enable_confidential_compute = true
@ -98,6 +120,7 @@ resource "google_compute_instance_group_manager" "instance_group_manager" {
name = local.name
description = "Instance group manager for Constellation"
base_instance_name = local.name
zone = var.zone
target_size = var.instance_count
dynamic "stateful_disk" {

View file

@ -1,8 +1,13 @@
variable "name" {
variable "base_name" {
type = string
description = "Base name of the instance group."
}
variable "node_group_name" {
type = string
description = "Constellation name for the node group (used for configuration and CSP-independent naming)."
}
variable "role" {
type = string
description = "The role of the instance group."
@ -84,3 +89,8 @@ variable "alias_ip_range_name" {
type = string
description = "Name of the alias IP range to use."
}
variable "zone" {
type = string
description = "Zone to deploy the instance group in."
}

View file

@ -41,9 +41,12 @@ resource "google_compute_backend_service" "backend" {
port_name = var.backend_port_name
timeout_sec = 240
backend {
group = var.backend_instance_group
balancing_mode = "UTILIZATION"
dynamic "backend" {
for_each = var.backend_instance_groups
content {
group = backend.value
balancing_mode = "UTILIZATION"
}
}
}

View file

@ -13,9 +13,9 @@ variable "backend_port_name" {
description = "Name of backend port. The same name should appear in the instance groups referenced by this service."
}
variable "backend_instance_group" {
type = string
description = "The URL of the instance group resource from which the load balancer will direct traffic."
variable "backend_instance_groups" {
type = list(string)
description = "The URLs of the instance group resources from which the load balancer will direct traffic."
}
variable "ip_address" {

View file

@ -4,20 +4,16 @@ variable "name" {
description = "Base name of the cluster."
}
variable "control_plane_count" {
type = number
description = "The number of control plane nodes to deploy."
}
variable "worker_count" {
type = number
description = "The number of worker nodes to deploy."
}
variable "state_disk_size" {
type = number
default = 30
description = "The size of the state disk in GB."
variable "node_groups" {
type = map(object({
role = string
zone = string
instance_type = string
disk_size = number
disk_type = string
initial_count = number
}))
description = "A map of node group names to node group configurations."
}
variable "project" {
@ -35,17 +31,6 @@ variable "zone" {
description = "The GCP zone to deploy the cluster in."
}
variable "instance_type" {
type = string
description = "The GCP instance type to deploy."
}
variable "state_disk_type" {
type = string
default = "pd-ssd"
description = "The type of the state disk."
}
variable "image_id" {
type = string
description = "The GCP image to use for the cluster nodes."