mirror of
https://github.com/edgelesssys/constellation.git
synced 2025-08-07 06:22:17 -04:00
AB#2260 Refactor disk-mapper recovery (#82)
* Refactor disk-mapper recovery * Adapt constellation recover command to use new disk-mapper recovery API * Fix Cilium connectivity on rebooting nodes (#89) * Lower CoreDNS reschedule timeout to 10 seconds (#93) Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
parent
a7b20b2a11
commit
8cb155d5c5
40 changed files with 1600 additions and 1130 deletions
226
disk-mapper/internal/setup/setup.go
Normal file
226
disk-mapper/internal/setup/setup.go
Normal file
|
@ -0,0 +1,226 @@
|
|||
/*
|
||||
Copyright (c) Edgeless Systems GmbH
|
||||
|
||||
SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
|
||||
package setup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/edgelesssys/constellation/disk-mapper/internal/systemd"
|
||||
"github.com/edgelesssys/constellation/internal/attestation"
|
||||
"github.com/edgelesssys/constellation/internal/attestation/vtpm"
|
||||
"github.com/edgelesssys/constellation/internal/constants"
|
||||
"github.com/edgelesssys/constellation/internal/crypto"
|
||||
"github.com/edgelesssys/constellation/internal/file"
|
||||
"github.com/edgelesssys/constellation/internal/logger"
|
||||
"github.com/edgelesssys/constellation/internal/nodestate"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
const (
|
||||
keyPath = "/run/cryptsetup-keys.d"
|
||||
keyFile = "state.key"
|
||||
stateDiskMappedName = "state"
|
||||
stateDiskMountPath = "/var/run/state"
|
||||
cryptsetupOptions = "cipher=aes-xts-plain64,integrity=hmac-sha256"
|
||||
stateInfoPath = stateDiskMountPath + "/constellation/node_state.json"
|
||||
)
|
||||
|
||||
// SetupManager handles formatting, mapping, mounting and unmounting of state disks.
|
||||
type SetupManager struct {
|
||||
log *logger.Logger
|
||||
csp string
|
||||
diskPath string
|
||||
fs afero.Afero
|
||||
mapper DeviceMapper
|
||||
mounter Mounter
|
||||
config ConfigurationGenerator
|
||||
openTPM vtpm.TPMOpenFunc
|
||||
}
|
||||
|
||||
// New initializes a SetupManager with the given parameters.
|
||||
func New(log *logger.Logger, csp string, diskPath string, fs afero.Afero,
|
||||
mapper DeviceMapper, mounter Mounter, openTPM vtpm.TPMOpenFunc,
|
||||
) *SetupManager {
|
||||
return &SetupManager{
|
||||
log: log,
|
||||
csp: csp,
|
||||
diskPath: diskPath,
|
||||
fs: fs,
|
||||
mapper: mapper,
|
||||
mounter: mounter,
|
||||
config: systemd.New(fs),
|
||||
openTPM: openTPM,
|
||||
}
|
||||
}
|
||||
|
||||
// PrepareExistingDisk requests and waits for a decryption key to remap the encrypted state disk.
|
||||
// Once the disk is mapped, the function taints the node as initialized by updating it's PCRs.
|
||||
func (s *SetupManager) PrepareExistingDisk(recover RecoveryDoer) error {
|
||||
s.log.Infof("Preparing existing state disk")
|
||||
uuid := s.mapper.DiskUUID()
|
||||
|
||||
endpoint := net.JoinHostPort("0.0.0.0", strconv.Itoa(constants.RecoveryPort))
|
||||
|
||||
passphrase, measurementSecret, err := recover.Do(uuid, endpoint)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to perform recovery: %w", err)
|
||||
}
|
||||
|
||||
if err := s.mapper.MapDisk(stateDiskMappedName, string(passphrase)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.mounter.MkdirAll(stateDiskMountPath, os.ModePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
// we do not care about cleaning up the mount point on error, since any errors returned here should cause a boot failure
|
||||
if err := s.mounter.Mount(filepath.Join("/dev/mapper/", stateDiskMappedName), stateDiskMountPath, "ext4", syscall.MS_RDONLY, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
measurementSalt, err := s.readMeasurementSalt(stateInfoPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
clusterID, err := attestation.DeriveClusterID(measurementSecret, measurementSalt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// taint the node as initialized
|
||||
if err := vtpm.MarkNodeAsBootstrapped(s.openTPM, clusterID); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.saveConfiguration(passphrase); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.mounter.Unmount(stateDiskMountPath, 0)
|
||||
}
|
||||
|
||||
// PrepareNewDisk prepares an instances state disk by formatting the disk as a LUKS device using a random passphrase.
|
||||
func (s *SetupManager) PrepareNewDisk() error {
|
||||
s.log.Infof("Preparing new state disk")
|
||||
|
||||
// generate and save temporary passphrase
|
||||
passphrase := make([]byte, crypto.RNGLengthDefault)
|
||||
if _, err := rand.Read(passphrase); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.saveConfiguration(passphrase); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.mapper.FormatDisk(string(passphrase)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.mapper.MapDisk(stateDiskMappedName, string(passphrase))
|
||||
}
|
||||
|
||||
func (s *SetupManager) readMeasurementSalt(path string) ([]byte, error) {
|
||||
handler := file.NewHandler(s.fs)
|
||||
var state nodestate.NodeState
|
||||
if err := handler.ReadJSON(path, &state); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(state.MeasurementSalt) != crypto.RNGLengthDefault {
|
||||
return nil, errors.New("missing state information to retaint node")
|
||||
}
|
||||
|
||||
return state.MeasurementSalt, nil
|
||||
}
|
||||
|
||||
// saveConfiguration saves the given passphrase and cryptsetup mapping configuration to disk.
|
||||
func (s *SetupManager) saveConfiguration(passphrase []byte) error {
|
||||
// passphrase
|
||||
if err := s.fs.MkdirAll(keyPath, os.ModePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.fs.WriteFile(filepath.Join(keyPath, keyFile), passphrase, 0o400); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// systemd cryptsetup unit
|
||||
return s.config.Generate(stateDiskMappedName, s.diskPath, filepath.Join(keyPath, keyFile), cryptsetupOptions)
|
||||
}
|
||||
|
||||
type recoveryServer interface {
|
||||
Serve(context.Context, net.Listener, string) (key, secret []byte, err error)
|
||||
}
|
||||
|
||||
type rejoinClient interface {
|
||||
Start(context.Context, string) (key, secret []byte)
|
||||
}
|
||||
|
||||
type nodeRecoverer struct {
|
||||
recoveryServer recoveryServer
|
||||
rejoinClient rejoinClient
|
||||
}
|
||||
|
||||
// NewNodeRecoverer initializes a new nodeRecoverer.
|
||||
func NewNodeRecoverer(recoveryServer recoveryServer, rejoinClient rejoinClient) *nodeRecoverer {
|
||||
return &nodeRecoverer{
|
||||
recoveryServer: recoveryServer,
|
||||
rejoinClient: rejoinClient,
|
||||
}
|
||||
}
|
||||
|
||||
// Do performs a recovery procedure on the given state disk.
|
||||
// The method starts a gRPC server to allow manual recovery by a user.
|
||||
// At the same time it tries to request a decryption key from all available Constellation control-plane nodes.
|
||||
func (r *nodeRecoverer) Do(uuid, endpoint string) (passphrase, measurementSecret []byte, err error) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
lis, err := net.Listen("tcp", endpoint)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer lis.Close()
|
||||
|
||||
var once sync.Once
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
key, secret, serveErr := r.recoveryServer.Serve(ctx, lis, uuid)
|
||||
once.Do(func() {
|
||||
cancel()
|
||||
passphrase = key
|
||||
measurementSecret = secret
|
||||
})
|
||||
if serveErr != nil && !errors.Is(serveErr, context.Canceled) {
|
||||
err = serveErr
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
key, secret := r.rejoinClient.Start(ctx, uuid)
|
||||
once.Do(func() {
|
||||
cancel()
|
||||
passphrase = key
|
||||
measurementSecret = secret
|
||||
})
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
return passphrase, measurementSecret, err
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue