AB#2261 Add loadbalancer for control-plane recovery (#151)

Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
Daniel Weiße 2022-09-14 13:25:42 +02:00 committed by GitHub
parent 273d89e002
commit e367e1a68b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 418 additions and 243 deletions

View File

@ -22,6 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Loadbalancer for control-plane recovery
### Changed
<!-- For changes in existing functionality. -->

View File

@ -31,6 +31,7 @@ const (
coordHealthProbeName = "coordHealthProbe"
debugdHealthProbeName = "debugdHealthProbe"
konnectivityHealthProbeName = "konnectivityHealthProbe"
recoveryHealthProbeName = "recoveryHealthProbe"
)
// Azure returns a Azure representation of LoadBalancer.
@ -94,6 +95,13 @@ func (l LoadBalancer) Azure() armnetwork.LoadBalancer {
Port: to.Ptr[int32](constants.KonnectivityPort),
},
},
{
Name: to.Ptr(recoveryHealthProbeName),
Properties: &armnetwork.ProbePropertiesFormat{
Protocol: to.Ptr(armnetwork.ProbeProtocolTCP),
Port: to.Ptr[int32](constants.RecoveryPort),
},
},
},
LoadBalancingRules: []*armnetwork.LoadBalancingRule{
{
@ -212,6 +220,35 @@ func (l LoadBalancer) Azure() armnetwork.LoadBalancer {
},
},
},
{
Name: to.Ptr("recoveryLoadBalancerRule"),
Properties: &armnetwork.LoadBalancingRulePropertiesFormat{
FrontendIPConfiguration: &armnetwork.SubResource{
ID: to.Ptr("/subscriptions/" + l.Subscription +
"/resourceGroups/" + l.ResourceGroup +
"/providers/Microsoft.Network/loadBalancers/" + l.Name +
"/frontendIPConfigurations/" + frontEndIPConfigName),
},
FrontendPort: to.Ptr[int32](constants.RecoveryPort),
BackendPort: to.Ptr[int32](constants.RecoveryPort),
Protocol: to.Ptr(armnetwork.TransportProtocolTCP),
Probe: &armnetwork.SubResource{
ID: to.Ptr("/subscriptions/" + l.Subscription +
"/resourceGroups/" + l.ResourceGroup +
"/providers/Microsoft.Network/loadBalancers/" + l.Name +
"/probes/" + recoveryHealthProbeName),
},
DisableOutboundSnat: to.Ptr(true),
BackendAddressPools: []*armnetwork.SubResource{
{
ID: to.Ptr("/subscriptions/" + l.Subscription +
"/resourceGroups/" + l.ResourceGroup +
"/providers/Microsoft.Network/loadBalancers/" + l.Name +
"/backendAddressPools/" + backEndAddressPoolControlPlaneName),
},
},
},
},
},
OutboundRules: []*armnetwork.OutboundRule{
{

View File

@ -40,7 +40,7 @@ func TestCreator(t *testing.T) {
GCPSubnetwork: "subnetwork",
GCPLoadbalancers: []string{"kube-lb", "boot-lb", "verify-lb"},
GCPFirewalls: []string{
"bootstrapper", "ssh", "nodeport", "kubernetes", "konnectivity",
"bootstrapper", "ssh", "nodeport", "kubernetes", "konnectivity", "recovery",
"allow-cluster-internal-tcp", "allow-cluster-internal-udp", "allow-cluster-internal-icmp",
"allow-node-internal-tcp", "allow-node-internal-udp", "allow-node-internal-icmp",
},

View File

@ -8,26 +8,28 @@ package cmd
import (
"context"
"errors"
"fmt"
"io"
"net"
"time"
"github.com/edgelesssys/constellation/cli/internal/cloudcmd"
"github.com/edgelesssys/constellation/cli/internal/proto"
"github.com/edgelesssys/constellation/internal/atls"
"github.com/edgelesssys/constellation/disk-mapper/recoverproto"
"github.com/edgelesssys/constellation/internal/attestation"
"github.com/edgelesssys/constellation/internal/cloud/cloudprovider"
"github.com/edgelesssys/constellation/internal/constants"
"github.com/edgelesssys/constellation/internal/crypto"
"github.com/edgelesssys/constellation/internal/file"
"github.com/edgelesssys/constellation/internal/grpc/dialer"
grpcRetry "github.com/edgelesssys/constellation/internal/grpc/retry"
"github.com/edgelesssys/constellation/internal/retry"
"github.com/edgelesssys/constellation/internal/state"
"github.com/spf13/afero"
"github.com/spf13/cobra"
"go.uber.org/multierr"
)
type recoveryClient interface {
Connect(endpoint string, validators atls.Validator) error
Recover(ctx context.Context, masterSecret, salt []byte) error
io.Closer
}
// NewRecoverCmd returns a new cobra.Command for the recover command.
func NewRecoverCmd() *cobra.Command {
cmd := &cobra.Command{
@ -46,12 +48,13 @@ func NewRecoverCmd() *cobra.Command {
func runRecover(cmd *cobra.Command, _ []string) error {
fileHandler := file.NewHandler(afero.NewOsFs())
recoveryClient := &proto.RecoverClient{}
defer recoveryClient.Close()
return recover(cmd, fileHandler, recoveryClient)
newDialer := func(validator *cloudcmd.Validator) *dialer.Dialer {
return dialer.New(nil, validator.V(cmd), &net.Dialer{})
}
return recover(cmd, fileHandler, newDialer)
}
func recover(cmd *cobra.Command, fileHandler file.Handler, recoveryClient recoveryClient) error {
func recover(cmd *cobra.Command, fileHandler file.Handler, newDialer func(validator *cloudcmd.Validator) *dialer.Dialer) error {
flags, err := parseRecoverFlags(cmd)
if err != nil {
return err
@ -73,29 +76,103 @@ func recover(cmd *cobra.Command, fileHandler file.Handler, recoveryClient recove
return fmt.Errorf("reading and validating config: %w", err)
}
validators, err := cloudcmd.NewValidator(provider, config)
validator, err := cloudcmd.NewValidator(provider, config)
if err != nil {
return err
}
if err := recoveryClient.Connect(flags.endpoint, validators.V(cmd)); err != nil {
return err
}
if err := recoveryClient.Recover(cmd.Context(), masterSecret.Key, masterSecret.Salt); err != nil {
return err
if err := recoverCall(cmd.Context(), newDialer(validator), flags.endpoint, masterSecret.Key, masterSecret.Salt); err != nil {
return fmt.Errorf("recovering cluster: %w", err)
}
cmd.Println("Pushed recovery key.")
return nil
}
func recoverCall(ctx context.Context, dialer grpcDialer, endpoint string, key, salt []byte) error {
measurementSecret, err := attestation.DeriveMeasurementSecret(key, salt)
if err != nil {
return err
}
doer := &recoverDoer{
dialer: dialer,
endpoint: endpoint,
getDiskKey: getStateDiskKeyFunc(key, salt),
measurementSecret: measurementSecret,
}
retrier := retry.NewIntervalRetrier(doer, 30*time.Second, grpcRetry.ServiceIsUnavailable)
if err := retrier.Do(ctx); err != nil {
return err
}
return nil
}
type recoverDoer struct {
dialer grpcDialer
endpoint string
measurementSecret []byte
getDiskKey func(uuid string) (key []byte, err error)
}
func (d *recoverDoer) Do(ctx context.Context) (retErr error) {
conn, err := d.dialer.Dial(ctx, d.endpoint)
if err != nil {
return fmt.Errorf("dialing recovery server: %w", err)
}
defer conn.Close()
// set up streaming client
protoClient := recoverproto.NewAPIClient(conn)
recoverclient, err := protoClient.Recover(ctx)
if err != nil {
return err
}
defer func() {
if err := recoverclient.CloseSend(); err != nil {
multierr.AppendInto(&retErr, err)
}
}()
// send measurement secret as first message
if err := recoverclient.Send(&recoverproto.RecoverMessage{
Request: &recoverproto.RecoverMessage_MeasurementSecret{
MeasurementSecret: d.measurementSecret,
},
}); err != nil {
return err
}
// receive disk uuid
res, err := recoverclient.Recv()
if err != nil {
return err
}
stateDiskKey, err := d.getDiskKey(res.DiskUuid)
if err != nil {
return err
}
// send disk key
if err := recoverclient.Send(&recoverproto.RecoverMessage{
Request: &recoverproto.RecoverMessage_StateDiskKey{
StateDiskKey: stateDiskKey,
},
}); err != nil {
return err
}
if _, err := recoverclient.Recv(); err != nil && !errors.Is(err, io.EOF) {
return err
}
return nil
}
func parseRecoverFlags(cmd *cobra.Command) (recoverFlags, error) {
endpoint, err := cmd.Flags().GetString("endpoint")
if err != nil {
return recoverFlags{}, fmt.Errorf("parsing endpoint argument: %w", err)
}
endpoint, err = addPortIfMissing(endpoint, constants.BootstrapperPort)
endpoint, err = addPortIfMissing(endpoint, constants.RecoveryPort)
if err != nil {
return recoverFlags{}, fmt.Errorf("validating endpoint argument: %w", err)
}
@ -122,3 +199,9 @@ type recoverFlags struct {
secretPath string
configPath string
}
func getStateDiskKeyFunc(masterKey, salt []byte) func(uuid string) ([]byte, error) {
return func(uuid string) ([]byte, error) {
return crypto.DeriveKey(masterKey, salt, []byte(crypto.HKDFInfoPrefix+uuid), crypto.StateDiskKeyLength)
}
}

View File

@ -10,18 +10,25 @@ import (
"bytes"
"context"
"errors"
"net"
"strconv"
"testing"
"github.com/edgelesssys/constellation/internal/atls"
"github.com/edgelesssys/constellation/cli/internal/cloudcmd"
"github.com/edgelesssys/constellation/disk-mapper/recoverproto"
"github.com/edgelesssys/constellation/internal/cloud/cloudprovider"
"github.com/edgelesssys/constellation/internal/config"
"github.com/edgelesssys/constellation/internal/constants"
"github.com/edgelesssys/constellation/internal/crypto/testvector"
"github.com/edgelesssys/constellation/internal/file"
"github.com/edgelesssys/constellation/internal/grpc/atlscredentials"
"github.com/edgelesssys/constellation/internal/grpc/dialer"
"github.com/edgelesssys/constellation/internal/grpc/testdialer"
"github.com/edgelesssys/constellation/internal/state"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
)
func TestRecoverCmdArgumentValidation(t *testing.T) {
@ -52,10 +59,25 @@ func TestRecoverCmdArgumentValidation(t *testing.T) {
func TestRecover(t *testing.T) {
validState := state.ConstellationState{CloudProvider: "GCP"}
invalidCSPState := state.ConstellationState{CloudProvider: "invalid"}
successActions := []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return stream.Send(&recoverproto.RecoverResponse{
DiskUuid: "00000000-0000-0000-0000-000000000000",
})
},
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
}
testCases := map[string]struct {
existingState state.ConstellationState
client *stubRecoveryClient
recoverServerAPI *stubRecoveryServer
masterSecret testvector.HKDF
endpointFlag string
masterSecretFlag string
@ -65,14 +87,16 @@ func TestRecover(t *testing.T) {
}{
"works": {
existingState: validState,
client: &stubRecoveryClient{},
recoverServerAPI: &stubRecoveryServer{actions: successActions},
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
},
"missing flags": {
recoverServerAPI: &stubRecoveryServer{actions: successActions},
wantErr: true,
},
"missing config": {
recoverServerAPI: &stubRecoveryServer{actions: successActions},
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
configFlag: "nonexistent-config",
@ -80,6 +104,7 @@ func TestRecover(t *testing.T) {
},
"missing state": {
existingState: validState,
recoverServerAPI: &stubRecoveryServer{actions: successActions},
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
stateless: true,
@ -87,20 +112,7 @@ func TestRecover(t *testing.T) {
},
"invalid cloud provider": {
existingState: invalidCSPState,
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
wantErr: true,
},
"connect fails": {
existingState: validState,
client: &stubRecoveryClient{connectErr: errors.New("connect failed")},
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
wantErr: true,
},
"pushing state key fails": {
existingState: validState,
client: &stubRecoveryClient{pushStateDiskKeyErr: errors.New("pushing key failed")},
recoverServerAPI: &stubRecoveryServer{actions: successActions},
endpointFlag: "192.0.2.1",
masterSecret: testvector.HKDFZero,
wantErr: true,
@ -113,6 +125,7 @@ func TestRecover(t *testing.T) {
require := require.New(t)
cmd := NewRecoverCmd()
cmd.SetContext(context.Background())
cmd.Flags().String("config", constants.ConfigFilename, "") // register persistent flag manually
out := &bytes.Buffer{}
cmd.SetOut(out)
@ -133,12 +146,32 @@ func TestRecover(t *testing.T) {
config := defaultConfigWithExpectedMeasurements(t, config.Default(), cloudprovider.FromString(tc.existingState.CloudProvider))
require.NoError(fileHandler.WriteYAML(constants.ConfigFilename, config))
require.NoError(fileHandler.WriteJSON("constellation-mastersecret.json", masterSecret{Key: tc.masterSecret.Secret, Salt: tc.masterSecret.Salt}, file.OptNone))
require.NoError(fileHandler.WriteJSON(
"constellation-mastersecret.json",
masterSecret{Key: tc.masterSecret.Secret, Salt: tc.masterSecret.Salt},
file.OptNone,
))
if !tc.stateless {
require.NoError(fileHandler.WriteJSON(constants.StateFilename, tc.existingState, file.OptNone))
require.NoError(fileHandler.WriteJSON(
constants.StateFilename,
tc.existingState,
file.OptNone,
))
}
err := recover(cmd, fileHandler, tc.client)
netDialer := testdialer.NewBufconnDialer()
newDialer := func(*cloudcmd.Validator) *dialer.Dialer {
return dialer.New(nil, nil, netDialer)
}
serverCreds := atlscredentials.New(nil, nil)
recoverServer := grpc.NewServer(grpc.Creds(serverCreds))
recoverproto.RegisterAPIServer(recoverServer, tc.recoverServerAPI)
listener := netDialer.GetListener(net.JoinHostPort("192.0.2.1", strconv.Itoa(constants.RecoveryPort)))
go recoverServer.Serve(listener)
defer recoverServer.GracefulStop()
err := recover(cmd, fileHandler, newDialer)
if tc.wantErr {
assert.Error(err)
@ -201,26 +234,156 @@ func TestParseRecoverFlags(t *testing.T) {
}
}
type stubRecoveryClient struct {
conn bool
connectErr error
closeErr error
pushStateDiskKeyErr error
pushStateDiskKeyKey []byte
func TestDoRecovery(t *testing.T) {
someErr := errors.New("error")
testCases := map[string]struct {
recoveryServer *stubRecoveryServer
wantErr bool
}{
"success": {
recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return stream.Send(&recoverproto.RecoverResponse{
DiskUuid: "00000000-0000-0000-0000-000000000000",
})
},
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
}},
},
"error on first recv": {
recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
return someErr
},
}},
wantErr: true,
},
"error on send": {
recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return someErr
},
}},
wantErr: true,
},
"error on second recv": {
recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return stream.Send(&recoverproto.RecoverResponse{
DiskUuid: "00000000-0000-0000-0000-000000000000",
})
},
func(stream recoverproto.API_RecoverServer) error {
return someErr
},
}},
wantErr: true,
},
"final message is an error": {
recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return stream.Send(&recoverproto.RecoverResponse{
DiskUuid: "00000000-0000-0000-0000-000000000000",
})
},
func(stream recoverproto.API_RecoverServer) error {
_, err := stream.Recv()
return err
},
func(stream recoverproto.API_RecoverServer) error {
return someErr
},
}},
wantErr: true,
},
}
func (c *stubRecoveryClient) Connect(string, atls.Validator) error {
c.conn = true
return c.connectErr
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
netDialer := testdialer.NewBufconnDialer()
serverCreds := atlscredentials.New(nil, nil)
recoverServer := grpc.NewServer(grpc.Creds(serverCreds))
recoverproto.RegisterAPIServer(recoverServer, tc.recoveryServer)
addr := net.JoinHostPort("192.0.2.1", strconv.Itoa(constants.RecoveryPort))
listener := netDialer.GetListener(addr)
go recoverServer.Serve(listener)
defer recoverServer.GracefulStop()
recoverDoer := &recoverDoer{
dialer: dialer.New(nil, nil, netDialer),
endpoint: addr,
measurementSecret: []byte("measurement-secret"),
getDiskKey: func(string) ([]byte, error) {
return []byte("disk-key"), nil
},
}
func (c *stubRecoveryClient) Close() error {
c.conn = false
return c.closeErr
err := recoverDoer.Do(context.Background())
if tc.wantErr {
assert.Error(err)
} else {
assert.NoError(err)
}
})
}
}
func (c *stubRecoveryClient) Recover(_ context.Context, stateDiskKey, _ []byte) error {
c.pushStateDiskKeyKey = stateDiskKey
return c.pushStateDiskKeyErr
func TestDeriveStateDiskKey(t *testing.T) {
testCases := map[string]struct {
masterSecret testvector.HKDF
}{
"all zero": {
masterSecret: testvector.HKDFZero,
},
"all 0xff": {
masterSecret: testvector.HKDF0xFF,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
getKeyFunc := getStateDiskKeyFunc(tc.masterSecret.Secret, tc.masterSecret.Salt)
stateDiskKey, err := getKeyFunc(tc.masterSecret.Info)
assert.NoError(err)
assert.Equal(tc.masterSecret.Output, stateDiskKey)
})
}
}
type stubRecoveryServer struct {
actions []func(recoverproto.API_RecoverServer) error
recoverproto.UnimplementedAPIServer
}
func (s *stubRecoveryServer) Recover(stream recoverproto.API_RecoverServer) error {
for _, action := range s.actions {
if err := action(stream); err != nil {
return err
}
}
return nil
}

View File

@ -91,6 +91,7 @@ func (c *Client) CreateInstances(ctx context.Context, input CreateInstancesInput
{Name: proto.String("bootstrapper"), Port: proto.Int32(constants.BootstrapperPort)},
{Name: proto.String("verify"), Port: proto.Int32(constants.VerifyServiceNodePortGRPC)},
{Name: proto.String("konnectivity"), Port: proto.Int32(constants.KonnectivityPort)},
{Name: proto.String("recovery"), Port: proto.Int32(constants.RecoveryPort)},
},
Template: c.controlPlaneTemplate,
UID: c.uid,

View File

@ -44,6 +44,7 @@ func (c *Client) CreateLoadBalancers(ctx context.Context, isDebugCluster bool) e
//
// LoadBalancer definitions.
//
// LoadBalancers added here also need to be referenced in instances.go:*Client.CreateInstances
c.loadbalancers = append(c.loadbalancers, &loadBalancer{
name: c.buildResourceName("kube"),
@ -78,6 +79,14 @@ func (c *Client) CreateLoadBalancers(ctx context.Context, isDebugCluster bool) e
healthCheck: computepb.HealthCheck_TCP,
})
c.loadbalancers = append(c.loadbalancers, &loadBalancer{
name: c.buildResourceName("recovery"),
ip: c.loadbalancerIPname,
frontendPort: constants.RecoveryPort,
backendPortName: "recovery",
healthCheck: computepb.HealthCheck_TCP,
})
// Only create when the debug cluster flag is set in the Constellation config
if isDebugCluster {
c.loadbalancers = append(c.loadbalancers, &loadBalancer{

View File

@ -119,10 +119,10 @@ func TestCreateLoadBalancers(t *testing.T) {
}
if tc.isDebugCluster {
assert.Equal(5, len(client.loadbalancers))
assert.Equal(6, len(client.loadbalancers))
assert.True(foundDebugdLB, "debugd loadbalancer not found in debug-mode")
} else {
assert.Equal(4, len(client.loadbalancers))
assert.Equal(5, len(client.loadbalancers))
assert.False(foundDebugdLB, "debugd loadbalancer found in non-debug mode")
}
})

View File

@ -1,116 +0,0 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package proto
import (
"context"
"errors"
"io"
"github.com/edgelesssys/constellation/disk-mapper/recoverproto"
"github.com/edgelesssys/constellation/internal/atls"
"github.com/edgelesssys/constellation/internal/attestation"
"github.com/edgelesssys/constellation/internal/crypto"
"github.com/edgelesssys/constellation/internal/grpc/atlscredentials"
"go.uber.org/multierr"
"google.golang.org/grpc"
)
// RecoverClient wraps a recoverAPI client and the connection to it.
type RecoverClient struct {
conn *grpc.ClientConn
recoverapi recoverproto.APIClient
}
// Connect connects the client to a given server, using the handed
// Validators for the attestation of the connection.
// The connection must be closed using Close(). If connect is
// called on a client that already has a connection, the old
// connection is closed.
func (c *RecoverClient) Connect(endpoint string, validators atls.Validator) error {
creds := atlscredentials.New(nil, []atls.Validator{validators})
conn, err := grpc.Dial(endpoint, grpc.WithTransportCredentials(creds))
if err != nil {
return err
}
_ = c.Close()
c.conn = conn
c.recoverapi = recoverproto.NewAPIClient(conn)
return nil
}
// Close closes the grpc connection of the client.
// Close is idempotent and can be called on non connected clients
// without returning an error.
func (c *RecoverClient) Close() error {
if c.conn == nil {
return nil
}
if err := c.conn.Close(); err != nil {
return err
}
c.conn = nil
return nil
}
// PushStateDiskKey pushes the state disk key to a constellation instance in recovery mode.
func (c *RecoverClient) Recover(ctx context.Context, masterSecret, salt []byte) (retErr error) {
if c.recoverapi == nil {
return errors.New("client is not connected")
}
measurementSecret, err := attestation.DeriveMeasurementSecret(masterSecret, salt)
if err != nil {
return err
}
recoverclient, err := c.recoverapi.Recover(ctx)
if err != nil {
return err
}
defer func() {
if err := recoverclient.CloseSend(); err != nil {
multierr.AppendInto(&retErr, err)
}
}()
if err := recoverclient.Send(&recoverproto.RecoverMessage{
Request: &recoverproto.RecoverMessage_MeasurementSecret{
MeasurementSecret: measurementSecret,
},
}); err != nil {
return err
}
res, err := recoverclient.Recv()
if err != nil {
return err
}
stateDiskKey, err := deriveStateDiskKey(masterSecret, salt, res.DiskUuid)
if err != nil {
return err
}
if err := recoverclient.Send(&recoverproto.RecoverMessage{
Request: &recoverproto.RecoverMessage_StateDiskKey{
StateDiskKey: stateDiskKey,
},
}); err != nil {
return err
}
if _, err := recoverclient.Recv(); err != nil && !errors.Is(err, io.EOF) {
return err
}
return nil
}
// deriveStateDiskKey derives a state disk key from a master key, a salt, and a disk UUID.
func deriveStateDiskKey(masterKey, salt []byte, diskUUID string) ([]byte, error) {
return crypto.DeriveKey(masterKey, salt, []byte(crypto.HKDFInfoPrefix+diskUUID), crypto.StateDiskKeyLength)
}

View File

@ -1,38 +0,0 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package proto
import (
"testing"
"github.com/edgelesssys/constellation/internal/crypto/testvector"
"github.com/stretchr/testify/assert"
)
func TestDeriveStateDiskKey(t *testing.T) {
testCases := map[string]struct {
masterSecret testvector.HKDF
}{
"all zero": {
masterSecret: testvector.HKDFZero,
},
"all 0xff": {
masterSecret: testvector.HKDF0xFF,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
stateDiskKey, err := deriveStateDiskKey(tc.masterSecret.Secret, tc.masterSecret.Salt, tc.masterSecret.Info)
assert.NoError(err)
assert.Equal(tc.masterSecret.Output, stateDiskKey)
})
}
}

View File

@ -33,6 +33,7 @@ import (
"github.com/edgelesssys/constellation/internal/constants"
"github.com/edgelesssys/constellation/internal/grpc/dialer"
"github.com/edgelesssys/constellation/internal/logger"
"github.com/edgelesssys/constellation/internal/role"
tpmClient "github.com/google/go-tpm-tools/client"
"github.com/google/go-tpm/tpm2"
"github.com/spf13/afero"
@ -128,8 +129,13 @@ func main() {
log.Named("rejoinClient"),
)
// set up recovery server
recoveryServer := recoveryserver.New(issuer, log.Named("recoveryServer"))
// set up recovery server if control-plane node
var recoveryServer setup.RecoveryServer
if self.Role == role.ControlPlane {
recoveryServer = recoveryserver.New(issuer, log.Named("recoveryServer"))
} else {
recoveryServer = recoveryserver.NewStub(log.Named("recoveryServer"))
}
err = setupManger.PrepareExistingDisk(setup.NewNodeRecoverer(recoveryServer, rejoinClient))
} else {

View File

@ -14,6 +14,7 @@ import (
"github.com/edgelesssys/constellation/disk-mapper/recoverproto"
"github.com/edgelesssys/constellation/internal/atls"
"github.com/edgelesssys/constellation/internal/grpc/atlscredentials"
"github.com/edgelesssys/constellation/internal/grpc/grpclog"
"github.com/edgelesssys/constellation/internal/logger"
"go.uber.org/zap"
"google.golang.org/grpc"
@ -89,8 +90,9 @@ func (s *RecoveryServer) Serve(ctx context.Context, listener net.Listener, diskU
func (s *RecoveryServer) Recover(stream recoverproto.API_RecoverServer) error {
s.mux.Lock()
defer s.mux.Unlock()
log := s.log.With(zap.String("peer", grpclog.PeerAddrFromContext(stream.Context())))
s.log.Infof("Received recover call")
log.Infof("Received recover call")
msg, err := stream.Recv()
if err != nil {
@ -99,35 +101,53 @@ func (s *RecoveryServer) Recover(stream recoverproto.API_RecoverServer) error {
measurementSecret, ok := msg.GetRequest().(*recoverproto.RecoverMessage_MeasurementSecret)
if !ok {
s.log.Errorf("Received invalid first message: not a measurement secret")
log.Errorf("Received invalid first message: not a measurement secret")
return status.Error(codes.InvalidArgument, "first message is not a measurement secret")
}
if err := stream.Send(&recoverproto.RecoverResponse{DiskUuid: s.diskUUID}); err != nil {
s.log.With(zap.Error(err)).Errorf("Failed to send disk UUID")
log.With(zap.Error(err)).Errorf("Failed to send disk UUID")
return status.Error(codes.Internal, "failed to send response")
}
msg, err = stream.Recv()
if err != nil {
s.log.With(zap.Error(err)).Errorf("Failed to receive disk key")
log.With(zap.Error(err)).Errorf("Failed to receive disk key")
return status.Error(codes.Internal, "failed to receive message")
}
stateDiskKey, ok := msg.GetRequest().(*recoverproto.RecoverMessage_StateDiskKey)
if !ok {
s.log.Errorf("Received invalid second message: not a state disk key")
log.Errorf("Received invalid second message: not a state disk key")
return status.Error(codes.InvalidArgument, "second message is not a state disk key")
}
s.stateDiskKey = stateDiskKey.StateDiskKey
s.measurementSecret = measurementSecret.MeasurementSecret
s.log.Infof("Received state disk key and measurement secret, shutting down server")
log.Infof("Received state disk key and measurement secret, shutting down server")
go s.grpcServer.GracefulStop()
return nil
}
// stubServer implements the RecoveryServer interface but does not actually start a server.
type stubServer struct {
log *logger.Logger
}
// NewStub returns a new stubbed RecoveryServer.
// We use this to avoid having to start a server for worker nodes, since they don't require manual recovery.
func NewStub(log *logger.Logger) *stubServer {
return &stubServer{log: log}
}
// Serve waits until the context is canceled and returns nil.
func (s *stubServer) Serve(ctx context.Context, _ net.Listener, _ string) ([]byte, []byte, error) {
s.log.Infof("Running as worker node, skipping recovery server")
<-ctx.Done()
return nil, nil, ctx.Err()
}
type server interface {
Serve(net.Listener) error
GracefulStop()

View File

@ -160,21 +160,21 @@ func (s *SetupManager) saveConfiguration(passphrase []byte) error {
return s.config.Generate(stateDiskMappedName, s.diskPath, filepath.Join(keyPath, keyFile), cryptsetupOptions)
}
type recoveryServer interface {
type RecoveryServer interface {
Serve(context.Context, net.Listener, string) (key, secret []byte, err error)
}
type rejoinClient interface {
type RejoinClient interface {
Start(context.Context, string) (key, secret []byte)
}
type nodeRecoverer struct {
recoveryServer recoveryServer
rejoinClient rejoinClient
recoveryServer RecoveryServer
rejoinClient RejoinClient
}
// NewNodeRecoverer initializes a new nodeRecoverer.
func NewNodeRecoverer(recoveryServer recoveryServer, rejoinClient rejoinClient) *nodeRecoverer {
func NewNodeRecoverer(recoveryServer RecoveryServer, rejoinClient RejoinClient) *nodeRecoverer {
return &nodeRecoverer{
recoveryServer: recoveryServer,
rejoinClient: rejoinClient,

View File

@ -26,6 +26,7 @@ type RecoverMessage struct {
unknownFields protoimpl.UnknownFields
// Types that are assignable to Request:
//
// *RecoverMessage_StateDiskKey
// *RecoverMessage_MeasurementSecret
Request isRecoverMessage_Request `protobuf_oneof:"request"`

View File

@ -42,7 +42,7 @@ const (
KMSPort = 9000
BootstrapperPort = 9000
KubernetesPort = 6443
RecoveryPort = 9000
RecoveryPort = 9999
EnclaveSSHPort = 2222
SSHPort = 22
NVMEOverTCPPort = 8009

View File

@ -49,6 +49,13 @@ var (
IPRange: "0.0.0.0/0",
FromPort: KonnectivityPort,
},
{
Name: "recovery",
Description: "control-plane recovery",
Protocol: "tcp",
IPRange: "0.0.0.0/0",
FromPort: RecoveryPort,
},
}
// IngressRulesDebug is the default set of ingress rules for a Constellation cluster with debug mode.