constellation/bootstrapper/internal/initserver/initserver.go

242 lines
7.0 KiB
Go
Raw Normal View History

2022-06-21 11:59:12 -04:00
package initserver
import (
"context"
"fmt"
"net"
"strings"
2022-08-01 10:51:34 -04:00
"time"
2022-06-21 11:59:12 -04:00
"github.com/edgelesssys/constellation/bootstrapper/initproto"
"github.com/edgelesssys/constellation/bootstrapper/internal/diskencryption"
"github.com/edgelesssys/constellation/bootstrapper/internal/kubernetes/k8sapi/resources"
2022-07-05 08:14:11 -04:00
"github.com/edgelesssys/constellation/internal/atls"
"github.com/edgelesssys/constellation/internal/attestation"
"github.com/edgelesssys/constellation/internal/crypto"
2022-06-21 11:59:12 -04:00
"github.com/edgelesssys/constellation/internal/file"
2022-07-05 08:14:11 -04:00
"github.com/edgelesssys/constellation/internal/grpc/atlscredentials"
"github.com/edgelesssys/constellation/internal/grpc/grpclog"
"github.com/edgelesssys/constellation/internal/logger"
"github.com/edgelesssys/constellation/internal/nodestate"
"github.com/edgelesssys/constellation/internal/role"
2022-06-21 11:59:12 -04:00
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
2022-08-01 10:51:34 -04:00
"google.golang.org/grpc/keepalive"
2022-06-21 11:59:12 -04:00
"google.golang.org/grpc/status"
)
2022-06-28 12:33:27 -04:00
// Server is the initialization server, which is started on each node.
// The server handles initialization calls from the CLI and initializes the
// Kubernetes cluster.
2022-06-21 11:59:12 -04:00
type Server struct {
nodeLock locker
2022-06-28 12:33:27 -04:00
initializer ClusterInitializer
disk encryptedDisk
2022-06-21 11:59:12 -04:00
fileHandler file.Handler
2022-06-28 12:33:27 -04:00
grpcServer serveStopper
cleaner cleaner
2022-06-21 11:59:12 -04:00
log *logger.Logger
2022-06-21 11:59:12 -04:00
initproto.UnimplementedAPIServer
}
2022-06-28 12:33:27 -04:00
// New creates a new initialization server.
func New(lock locker, kube ClusterInitializer, issuer atls.Issuer, fh file.Handler, log *logger.Logger) *Server {
log = log.Named("initServer")
2022-06-21 11:59:12 -04:00
server := &Server{
2022-06-28 12:33:27 -04:00
nodeLock: lock,
disk: diskencryption.New(),
initializer: kube,
2022-07-05 08:14:11 -04:00
fileHandler: fh,
log: log,
2022-06-21 11:59:12 -04:00
}
grpcServer := grpc.NewServer(
grpc.Creds(atlscredentials.New(issuer, nil)),
2022-08-01 10:51:34 -04:00
grpc.KeepaliveParams(keepalive.ServerParameters{Time: 15 * time.Second}),
log.Named("gRPC").GetServerUnaryInterceptor(),
2022-06-21 11:59:12 -04:00
)
initproto.RegisterAPIServer(grpcServer, server)
server.grpcServer = grpcServer
return server
}
// Serve starts the initialization server.
func (s *Server) Serve(ip, port string, cleaner cleaner) error {
s.cleaner = cleaner
2022-06-21 11:59:12 -04:00
lis, err := net.Listen("tcp", net.JoinHostPort(ip, port))
if err != nil {
return fmt.Errorf("failed to listen: %w", err)
}
2022-08-01 10:51:34 -04:00
s.log.Infof("Starting")
return s.grpcServer.Serve(lis)
2022-06-21 11:59:12 -04:00
}
2022-06-28 12:33:27 -04:00
// Init initializes the cluster.
2022-06-21 11:59:12 -04:00
func (s *Server) Init(ctx context.Context, req *initproto.InitRequest) (*initproto.InitResponse, error) {
defer s.cleaner.Clean()
log := s.log.With(zap.String("peer", grpclog.PeerAddrFromContext(ctx)))
log.Infof("Init called")
2022-07-05 08:14:11 -04:00
// generate values for cluster attestation
measurementSalt, clusterID, err := deriveMeasurementValues(req.MasterSecret, req.Salt)
if err != nil {
return nil, status.Errorf(codes.Internal, "deriving measurement values: %s", err)
}
nodeLockAcquired, err := s.nodeLock.TryLockOnce(clusterID)
if err != nil {
return nil, status.Errorf(codes.Internal, "locking node: %s", err)
}
if !nodeLockAcquired {
2022-06-28 12:33:27 -04:00
// The join client seems to already have a connection to an
// existing join service. At this point, any further call to
// init does not make sense, so we just stop.
//
// The server stops itself after the current call is done.
log.Warnf("Node is already in a join process")
2022-06-21 11:59:12 -04:00
return nil, status.Error(codes.FailedPrecondition, "node is already being activated")
}
if err := s.setupDisk(req.MasterSecret, req.Salt); err != nil {
2022-06-21 11:59:12 -04:00
return nil, status.Errorf(codes.Internal, "setting up disk: %s", err)
}
state := nodestate.NodeState{
Role: role.ControlPlane,
MeasurementSalt: measurementSalt,
2022-06-21 11:59:12 -04:00
}
if err := state.ToFile(s.fileHandler); err != nil {
return nil, status.Errorf(codes.Internal, "persisting node state: %s", err)
}
2022-06-28 12:33:27 -04:00
kubeconfig, err := s.initializer.InitCluster(ctx,
2022-06-21 11:59:12 -04:00
req.AutoscalingNodeGroups,
req.CloudServiceAccountUri,
req.KubernetesVersion,
measurementSalt,
req.EnforcedPcrs,
req.EnforceIdkeydigest,
resources.KMSConfig{
2022-06-21 11:59:12 -04:00
MasterSecret: req.MasterSecret,
Salt: req.Salt,
2022-06-21 11:59:12 -04:00
KMSURI: req.KmsUri,
StorageURI: req.StorageUri,
KeyEncryptionKeyID: req.KeyEncryptionKeyId,
UseExistingKEK: req.UseExistingKek,
},
sshProtoKeysToMap(req.SshUserKeys),
2022-08-12 04:20:19 -04:00
req.HelmDeployments,
s.log,
2022-06-21 11:59:12 -04:00
)
if err != nil {
return nil, status.Errorf(codes.Internal, "initializing cluster: %s", err)
}
log.Infof("Init succeeded")
2022-06-21 11:59:12 -04:00
return &initproto.InitResponse{
Kubeconfig: kubeconfig,
ClusterId: clusterID,
2022-06-21 11:59:12 -04:00
}, nil
}
// Stop stops the initialization server gracefully.
func (s *Server) Stop() {
s.grpcServer.GracefulStop()
}
func (s *Server) setupDisk(masterSecret, salt []byte) error {
2022-06-21 11:59:12 -04:00
if err := s.disk.Open(); err != nil {
return fmt.Errorf("opening encrypted disk: %w", err)
}
defer s.disk.Close()
uuid, err := s.disk.UUID()
if err != nil {
return fmt.Errorf("retrieving uuid of disk: %w", err)
}
uuid = strings.ToLower(uuid)
diskKey, err := crypto.DeriveKey(masterSecret, salt, []byte(crypto.HKDFInfoPrefix+uuid), crypto.DerivedKeyLengthDefault)
2022-06-21 11:59:12 -04:00
if err != nil {
return err
}
return s.disk.UpdatePassphrase(string(diskKey))
}
func sshProtoKeysToMap(keys []*initproto.SSHUserKey) map[string]string {
keyMap := make(map[string]string)
for _, key := range keys {
keyMap[key.Username] = key.PublicKey
}
return keyMap
}
func deriveMeasurementValues(masterSecret, hkdfSalt []byte) (salt, clusterID []byte, err error) {
salt, err = crypto.GenerateRandomBytes(crypto.RNGLengthDefault)
if err != nil {
return nil, nil, err
}
secret, err := attestation.DeriveMeasurementSecret(masterSecret, hkdfSalt)
if err != nil {
return nil, nil, err
}
clusterID, err = attestation.DeriveClusterID(secret, salt)
if err != nil {
return nil, nil, err
}
return salt, clusterID, nil
}
2022-06-28 12:33:27 -04:00
// ClusterInitializer has the ability to initialize a cluster.
2022-06-21 11:59:12 -04:00
type ClusterInitializer interface {
2022-06-28 12:33:27 -04:00
// InitCluster initializes a new Kubernetes cluster.
2022-06-21 11:59:12 -04:00
InitCluster(
ctx context.Context,
autoscalingNodeGroups []string,
cloudServiceAccountURI string,
2022-06-28 12:33:27 -04:00
k8sVersion string,
measurementSalt []byte,
enforcedPcrs []uint32,
enforceIdKeyDigest bool,
kmsConfig resources.KMSConfig,
2022-06-21 11:59:12 -04:00
sshUserKeys map[string]string,
2022-08-12 04:20:19 -04:00
helmDeployments []byte,
log *logger.Logger,
2022-06-21 11:59:12 -04:00
) ([]byte, error)
}
2022-06-28 12:33:27 -04:00
type encryptedDisk interface {
2022-06-21 11:59:12 -04:00
// Open prepares the underlying device for disk operations.
Open() error
// Close closes the underlying device.
Close() error
// UUID gets the device's UUID.
UUID() (string, error)
// UpdatePassphrase switches the initial random passphrase of the encrypted disk to a permanent passphrase.
UpdatePassphrase(passphrase string) error
}
2022-06-28 12:33:27 -04:00
type serveStopper interface {
// Serve starts the server.
Serve(lis net.Listener) error
// GracefulStop stops the server and blocks until all requests are done.
GracefulStop()
}
type locker interface {
// TryLockOnce tries to lock the node. If the node is already locked, it
// returns false. If the node is unlocked, it locks it and returns true.
TryLockOnce(clusterID []byte) (bool, error)
}
type cleaner interface {
Clean()
}