2022-09-08 08:45:27 -04:00
|
|
|
/*
|
|
|
|
Copyright (c) Edgeless Systems GmbH
|
|
|
|
|
|
|
|
SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
*/
|
|
|
|
|
2023-01-19 09:57:50 -05:00
|
|
|
/*
|
|
|
|
Package recoveryserver implements the gRPC endpoints for recovering a restarting node.
|
|
|
|
|
|
|
|
The endpoint is only available for control-plane nodes,
|
|
|
|
worker nodes should only rejoin the cluster using Constellation's JoinService.
|
|
|
|
|
|
|
|
This endpoint can be used by an admin in case of a complete cluster shutdown,
|
|
|
|
in which case a node is unable to rejoin the cluster automatically.
|
|
|
|
*/
|
2022-09-08 08:45:27 -04:00
|
|
|
package recoveryserver
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-02-08 09:20:01 -05:00
|
|
|
"log/slog"
|
2022-09-08 08:45:27 -04:00
|
|
|
"net"
|
|
|
|
"sync"
|
|
|
|
|
2022-09-21 07:47:57 -04:00
|
|
|
"github.com/edgelesssys/constellation/v2/disk-mapper/recoverproto"
|
|
|
|
"github.com/edgelesssys/constellation/v2/internal/atls"
|
2023-01-16 05:19:03 -05:00
|
|
|
"github.com/edgelesssys/constellation/v2/internal/crypto"
|
2022-09-21 07:47:57 -04:00
|
|
|
"github.com/edgelesssys/constellation/v2/internal/grpc/atlscredentials"
|
|
|
|
"github.com/edgelesssys/constellation/v2/internal/grpc/grpclog"
|
2023-01-16 05:19:03 -05:00
|
|
|
"github.com/edgelesssys/constellation/v2/internal/kms/kms"
|
2022-09-21 07:47:57 -04:00
|
|
|
"github.com/edgelesssys/constellation/v2/internal/logger"
|
2022-09-08 08:45:27 -04:00
|
|
|
"google.golang.org/grpc"
|
|
|
|
"google.golang.org/grpc/codes"
|
|
|
|
"google.golang.org/grpc/status"
|
|
|
|
)
|
|
|
|
|
2023-01-16 05:19:03 -05:00
|
|
|
type kmsFactory func(ctx context.Context, storageURI string, kmsURI string) (kms.CloudKMS, error)
|
|
|
|
|
2022-09-08 08:45:27 -04:00
|
|
|
// RecoveryServer is a gRPC server that can be used by an admin to recover a restarting node.
|
|
|
|
type RecoveryServer struct {
|
|
|
|
mux sync.Mutex
|
|
|
|
|
|
|
|
diskUUID string
|
|
|
|
stateDiskKey []byte
|
|
|
|
measurementSecret []byte
|
|
|
|
grpcServer server
|
2023-01-16 05:19:03 -05:00
|
|
|
factory kmsFactory
|
2022-09-08 08:45:27 -04:00
|
|
|
|
2024-02-08 09:20:01 -05:00
|
|
|
log *slog.Logger
|
2022-09-08 08:45:27 -04:00
|
|
|
|
|
|
|
recoverproto.UnimplementedAPIServer
|
|
|
|
}
|
|
|
|
|
|
|
|
// New returns a new RecoveryServer.
|
2024-02-08 09:20:01 -05:00
|
|
|
func New(issuer atls.Issuer, factory kmsFactory, log *slog.Logger) *RecoveryServer {
|
2022-09-08 08:45:27 -04:00
|
|
|
server := &RecoveryServer{
|
2023-01-16 05:19:03 -05:00
|
|
|
log: log,
|
|
|
|
factory: factory,
|
2022-09-08 08:45:27 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
grpcServer := grpc.NewServer(
|
|
|
|
grpc.Creds(atlscredentials.New(issuer, nil)),
|
2024-08-29 04:44:22 -04:00
|
|
|
logger.GetServerStreamInterceptor(logger.GRPCLogger(log)),
|
2022-09-08 08:45:27 -04:00
|
|
|
)
|
|
|
|
recoverproto.RegisterAPIServer(grpcServer, server)
|
|
|
|
|
|
|
|
server.grpcServer = grpcServer
|
|
|
|
return server
|
|
|
|
}
|
|
|
|
|
|
|
|
// Serve starts the recovery server.
|
|
|
|
// It blocks until a recover request call is successful.
|
|
|
|
// The server will shut down when the call is successful and the keys are returned.
|
|
|
|
// Additionally, the server can be shutdown by canceling the context.
|
|
|
|
func (s *RecoveryServer) Serve(ctx context.Context, listener net.Listener, diskUUID string) (diskKey, measurementSecret []byte, err error) {
|
2024-02-08 09:20:01 -05:00
|
|
|
s.log.Info("Starting RecoveryServer")
|
2022-09-08 08:45:27 -04:00
|
|
|
s.diskUUID = diskUUID
|
|
|
|
recoveryDone := make(chan struct{}, 1)
|
|
|
|
var serveErr error
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
|
|
defer wg.Wait()
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
|
|
|
serveErr = s.grpcServer.Serve(listener)
|
|
|
|
recoveryDone <- struct{}{}
|
|
|
|
}()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2024-02-08 09:20:01 -05:00
|
|
|
s.log.Info("Context canceled, shutting down server")
|
2022-09-08 08:45:27 -04:00
|
|
|
s.grpcServer.GracefulStop()
|
|
|
|
return nil, nil, ctx.Err()
|
|
|
|
case <-recoveryDone:
|
|
|
|
if serveErr != nil {
|
|
|
|
return nil, nil, serveErr
|
|
|
|
}
|
|
|
|
return s.stateDiskKey, s.measurementSecret, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recover is a bidirectional streaming RPC that is used to send recovery keys to a restarting node.
|
2023-01-16 05:19:03 -05:00
|
|
|
func (s *RecoveryServer) Recover(ctx context.Context, req *recoverproto.RecoverMessage) (*recoverproto.RecoverResponse, error) {
|
2022-09-08 08:45:27 -04:00
|
|
|
s.mux.Lock()
|
|
|
|
defer s.mux.Unlock()
|
2024-02-08 09:20:01 -05:00
|
|
|
log := s.log.With(slog.String("peer", grpclog.PeerAddrFromContext(ctx)))
|
2022-09-08 08:45:27 -04:00
|
|
|
|
2024-02-08 09:20:01 -05:00
|
|
|
log.Info("Received recover call")
|
2022-09-08 08:45:27 -04:00
|
|
|
|
2023-01-16 05:19:03 -05:00
|
|
|
cloudKms, err := s.factory(ctx, req.StorageUri, req.KmsUri)
|
2022-09-08 08:45:27 -04:00
|
|
|
if err != nil {
|
2023-01-16 05:19:03 -05:00
|
|
|
return nil, status.Errorf(codes.Internal, "creating kms client: %s", err)
|
2022-09-08 08:45:27 -04:00
|
|
|
}
|
|
|
|
|
2023-01-16 05:19:03 -05:00
|
|
|
measurementSecret, err := cloudKms.GetDEK(ctx, crypto.DEKPrefix+crypto.MeasurementSecretKeyID, crypto.DerivedKeyLengthDefault)
|
2022-09-08 08:45:27 -04:00
|
|
|
if err != nil {
|
2023-01-16 05:19:03 -05:00
|
|
|
return nil, status.Errorf(codes.Internal, "requesting measurementSecret: %s", err)
|
2022-09-08 08:45:27 -04:00
|
|
|
}
|
2023-01-16 05:19:03 -05:00
|
|
|
stateDiskKey, err := cloudKms.GetDEK(ctx, crypto.DEKPrefix+s.diskUUID, crypto.StateDiskKeyLength)
|
|
|
|
if err != nil {
|
|
|
|
return nil, status.Errorf(codes.Internal, "requesting stateDiskKey: %s", err)
|
2022-09-08 08:45:27 -04:00
|
|
|
}
|
2023-01-16 05:19:03 -05:00
|
|
|
s.stateDiskKey = stateDiskKey
|
|
|
|
s.measurementSecret = measurementSecret
|
2024-02-08 09:20:01 -05:00
|
|
|
log.Info("Received state disk key and measurement secret, shutting down server")
|
2022-09-08 08:45:27 -04:00
|
|
|
|
|
|
|
go s.grpcServer.GracefulStop()
|
2023-01-16 05:19:03 -05:00
|
|
|
return &recoverproto.RecoverResponse{}, nil
|
2022-09-08 08:45:27 -04:00
|
|
|
}
|
|
|
|
|
2022-10-05 09:02:46 -04:00
|
|
|
// StubServer implements the RecoveryServer interface but does not actually start a server.
|
|
|
|
type StubServer struct {
|
2024-02-08 09:20:01 -05:00
|
|
|
log *slog.Logger
|
2022-09-14 07:25:42 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewStub returns a new stubbed RecoveryServer.
|
|
|
|
// We use this to avoid having to start a server for worker nodes, since they don't require manual recovery.
|
2024-02-08 09:20:01 -05:00
|
|
|
func NewStub(log *slog.Logger) *StubServer {
|
2022-10-05 09:02:46 -04:00
|
|
|
return &StubServer{log: log}
|
2022-09-14 07:25:42 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Serve waits until the context is canceled and returns nil.
|
2022-10-05 09:02:46 -04:00
|
|
|
func (s *StubServer) Serve(ctx context.Context, _ net.Listener, _ string) ([]byte, []byte, error) {
|
2024-02-08 09:20:01 -05:00
|
|
|
s.log.Info("Running as worker node, skipping recovery server")
|
2022-09-14 07:25:42 -04:00
|
|
|
<-ctx.Done()
|
|
|
|
return nil, nil, ctx.Err()
|
|
|
|
}
|
|
|
|
|
2022-09-08 08:45:27 -04:00
|
|
|
type server interface {
|
|
|
|
Serve(net.Listener) error
|
|
|
|
GracefulStop()
|
|
|
|
}
|