From e367e1a68b1b6da5493a9fab26466ff0fb8efcdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Wei=C3=9Fe?= <66256922+daniel-weisse@users.noreply.github.com> Date: Wed, 14 Sep 2022 13:25:42 +0200 Subject: [PATCH] AB#2261 Add loadbalancer for control-plane recovery (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel Weiße --- CHANGELOG.md | 2 + cli/internal/azure/loadbalancer.go | 37 +++ cli/internal/cloudcmd/create_test.go | 2 +- cli/internal/cmd/recover.go | 123 ++++++-- cli/internal/cmd/recover_test.go | 267 ++++++++++++++---- cli/internal/gcp/client/instances.go | 1 + cli/internal/gcp/client/loadbalancer.go | 9 + cli/internal/gcp/client/loadbalancer_test.go | 4 +- cli/internal/proto/recover.go | 116 -------- cli/internal/proto/recover_test.go | 38 --- disk-mapper/cmd/main.go | 10 +- disk-mapper/internal/recoveryserver/server.go | 32 ++- disk-mapper/internal/setup/setup.go | 10 +- disk-mapper/recoverproto/recover.pb.go | 1 + internal/constants/constants.go | 2 +- internal/constants/firewall.go | 7 + 16 files changed, 418 insertions(+), 243 deletions(-) delete mode 100644 cli/internal/proto/recover.go delete mode 100644 cli/internal/proto/recover_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 308f7c0e5..ce6c7a0d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Loadbalancer for control-plane recovery + ### Changed diff --git a/cli/internal/azure/loadbalancer.go b/cli/internal/azure/loadbalancer.go index d397e4ea7..96a95ad0f 100644 --- a/cli/internal/azure/loadbalancer.go +++ b/cli/internal/azure/loadbalancer.go @@ -31,6 +31,7 @@ const ( coordHealthProbeName = "coordHealthProbe" debugdHealthProbeName = "debugdHealthProbe" konnectivityHealthProbeName = "konnectivityHealthProbe" + recoveryHealthProbeName = "recoveryHealthProbe" ) // Azure returns a Azure representation of LoadBalancer. @@ -94,6 +95,13 @@ func (l LoadBalancer) Azure() armnetwork.LoadBalancer { Port: to.Ptr[int32](constants.KonnectivityPort), }, }, + { + Name: to.Ptr(recoveryHealthProbeName), + Properties: &armnetwork.ProbePropertiesFormat{ + Protocol: to.Ptr(armnetwork.ProbeProtocolTCP), + Port: to.Ptr[int32](constants.RecoveryPort), + }, + }, }, LoadBalancingRules: []*armnetwork.LoadBalancingRule{ { @@ -212,6 +220,35 @@ func (l LoadBalancer) Azure() armnetwork.LoadBalancer { }, }, }, + { + Name: to.Ptr("recoveryLoadBalancerRule"), + Properties: &armnetwork.LoadBalancingRulePropertiesFormat{ + FrontendIPConfiguration: &armnetwork.SubResource{ + ID: to.Ptr("/subscriptions/" + l.Subscription + + "/resourceGroups/" + l.ResourceGroup + + "/providers/Microsoft.Network/loadBalancers/" + l.Name + + "/frontendIPConfigurations/" + frontEndIPConfigName), + }, + FrontendPort: to.Ptr[int32](constants.RecoveryPort), + BackendPort: to.Ptr[int32](constants.RecoveryPort), + Protocol: to.Ptr(armnetwork.TransportProtocolTCP), + Probe: &armnetwork.SubResource{ + ID: to.Ptr("/subscriptions/" + l.Subscription + + "/resourceGroups/" + l.ResourceGroup + + "/providers/Microsoft.Network/loadBalancers/" + l.Name + + "/probes/" + recoveryHealthProbeName), + }, + DisableOutboundSnat: to.Ptr(true), + BackendAddressPools: []*armnetwork.SubResource{ + { + ID: to.Ptr("/subscriptions/" + l.Subscription + + "/resourceGroups/" + l.ResourceGroup + + "/providers/Microsoft.Network/loadBalancers/" + l.Name + + "/backendAddressPools/" + backEndAddressPoolControlPlaneName), + }, + }, + }, + }, }, OutboundRules: []*armnetwork.OutboundRule{ { diff --git a/cli/internal/cloudcmd/create_test.go b/cli/internal/cloudcmd/create_test.go index a48b25804..984098bf4 100644 --- a/cli/internal/cloudcmd/create_test.go +++ b/cli/internal/cloudcmd/create_test.go @@ -40,7 +40,7 @@ func TestCreator(t *testing.T) { GCPSubnetwork: "subnetwork", GCPLoadbalancers: []string{"kube-lb", "boot-lb", "verify-lb"}, GCPFirewalls: []string{ - "bootstrapper", "ssh", "nodeport", "kubernetes", "konnectivity", + "bootstrapper", "ssh", "nodeport", "kubernetes", "konnectivity", "recovery", "allow-cluster-internal-tcp", "allow-cluster-internal-udp", "allow-cluster-internal-icmp", "allow-node-internal-tcp", "allow-node-internal-udp", "allow-node-internal-icmp", }, diff --git a/cli/internal/cmd/recover.go b/cli/internal/cmd/recover.go index ddb1d9a53..253747f46 100644 --- a/cli/internal/cmd/recover.go +++ b/cli/internal/cmd/recover.go @@ -8,26 +8,28 @@ package cmd import ( "context" + "errors" "fmt" "io" + "net" + "time" "github.com/edgelesssys/constellation/cli/internal/cloudcmd" - "github.com/edgelesssys/constellation/cli/internal/proto" - "github.com/edgelesssys/constellation/internal/atls" + "github.com/edgelesssys/constellation/disk-mapper/recoverproto" + "github.com/edgelesssys/constellation/internal/attestation" "github.com/edgelesssys/constellation/internal/cloud/cloudprovider" "github.com/edgelesssys/constellation/internal/constants" + "github.com/edgelesssys/constellation/internal/crypto" "github.com/edgelesssys/constellation/internal/file" + "github.com/edgelesssys/constellation/internal/grpc/dialer" + grpcRetry "github.com/edgelesssys/constellation/internal/grpc/retry" + "github.com/edgelesssys/constellation/internal/retry" "github.com/edgelesssys/constellation/internal/state" "github.com/spf13/afero" "github.com/spf13/cobra" + "go.uber.org/multierr" ) -type recoveryClient interface { - Connect(endpoint string, validators atls.Validator) error - Recover(ctx context.Context, masterSecret, salt []byte) error - io.Closer -} - // NewRecoverCmd returns a new cobra.Command for the recover command. func NewRecoverCmd() *cobra.Command { cmd := &cobra.Command{ @@ -46,12 +48,13 @@ func NewRecoverCmd() *cobra.Command { func runRecover(cmd *cobra.Command, _ []string) error { fileHandler := file.NewHandler(afero.NewOsFs()) - recoveryClient := &proto.RecoverClient{} - defer recoveryClient.Close() - return recover(cmd, fileHandler, recoveryClient) + newDialer := func(validator *cloudcmd.Validator) *dialer.Dialer { + return dialer.New(nil, validator.V(cmd), &net.Dialer{}) + } + return recover(cmd, fileHandler, newDialer) } -func recover(cmd *cobra.Command, fileHandler file.Handler, recoveryClient recoveryClient) error { +func recover(cmd *cobra.Command, fileHandler file.Handler, newDialer func(validator *cloudcmd.Validator) *dialer.Dialer) error { flags, err := parseRecoverFlags(cmd) if err != nil { return err @@ -73,29 +76,103 @@ func recover(cmd *cobra.Command, fileHandler file.Handler, recoveryClient recove return fmt.Errorf("reading and validating config: %w", err) } - validators, err := cloudcmd.NewValidator(provider, config) + validator, err := cloudcmd.NewValidator(provider, config) if err != nil { return err } - if err := recoveryClient.Connect(flags.endpoint, validators.V(cmd)); err != nil { - return err - } - - if err := recoveryClient.Recover(cmd.Context(), masterSecret.Key, masterSecret.Salt); err != nil { - return err + if err := recoverCall(cmd.Context(), newDialer(validator), flags.endpoint, masterSecret.Key, masterSecret.Salt); err != nil { + return fmt.Errorf("recovering cluster: %w", err) } cmd.Println("Pushed recovery key.") return nil } +func recoverCall(ctx context.Context, dialer grpcDialer, endpoint string, key, salt []byte) error { + measurementSecret, err := attestation.DeriveMeasurementSecret(key, salt) + if err != nil { + return err + } + doer := &recoverDoer{ + dialer: dialer, + endpoint: endpoint, + getDiskKey: getStateDiskKeyFunc(key, salt), + measurementSecret: measurementSecret, + } + retrier := retry.NewIntervalRetrier(doer, 30*time.Second, grpcRetry.ServiceIsUnavailable) + if err := retrier.Do(ctx); err != nil { + return err + } + return nil +} + +type recoverDoer struct { + dialer grpcDialer + endpoint string + measurementSecret []byte + getDiskKey func(uuid string) (key []byte, err error) +} + +func (d *recoverDoer) Do(ctx context.Context) (retErr error) { + conn, err := d.dialer.Dial(ctx, d.endpoint) + if err != nil { + return fmt.Errorf("dialing recovery server: %w", err) + } + defer conn.Close() + + // set up streaming client + protoClient := recoverproto.NewAPIClient(conn) + recoverclient, err := protoClient.Recover(ctx) + if err != nil { + return err + } + defer func() { + if err := recoverclient.CloseSend(); err != nil { + multierr.AppendInto(&retErr, err) + } + }() + + // send measurement secret as first message + if err := recoverclient.Send(&recoverproto.RecoverMessage{ + Request: &recoverproto.RecoverMessage_MeasurementSecret{ + MeasurementSecret: d.measurementSecret, + }, + }); err != nil { + return err + } + + // receive disk uuid + res, err := recoverclient.Recv() + if err != nil { + return err + } + stateDiskKey, err := d.getDiskKey(res.DiskUuid) + if err != nil { + return err + } + + // send disk key + if err := recoverclient.Send(&recoverproto.RecoverMessage{ + Request: &recoverproto.RecoverMessage_StateDiskKey{ + StateDiskKey: stateDiskKey, + }, + }); err != nil { + return err + } + + if _, err := recoverclient.Recv(); err != nil && !errors.Is(err, io.EOF) { + return err + } + return nil +} + func parseRecoverFlags(cmd *cobra.Command) (recoverFlags, error) { endpoint, err := cmd.Flags().GetString("endpoint") if err != nil { return recoverFlags{}, fmt.Errorf("parsing endpoint argument: %w", err) } - endpoint, err = addPortIfMissing(endpoint, constants.BootstrapperPort) + endpoint, err = addPortIfMissing(endpoint, constants.RecoveryPort) if err != nil { return recoverFlags{}, fmt.Errorf("validating endpoint argument: %w", err) } @@ -122,3 +199,9 @@ type recoverFlags struct { secretPath string configPath string } + +func getStateDiskKeyFunc(masterKey, salt []byte) func(uuid string) ([]byte, error) { + return func(uuid string) ([]byte, error) { + return crypto.DeriveKey(masterKey, salt, []byte(crypto.HKDFInfoPrefix+uuid), crypto.StateDiskKeyLength) + } +} diff --git a/cli/internal/cmd/recover_test.go b/cli/internal/cmd/recover_test.go index 9cf13b6e0..1be6e258c 100644 --- a/cli/internal/cmd/recover_test.go +++ b/cli/internal/cmd/recover_test.go @@ -10,18 +10,25 @@ import ( "bytes" "context" "errors" + "net" + "strconv" "testing" - "github.com/edgelesssys/constellation/internal/atls" + "github.com/edgelesssys/constellation/cli/internal/cloudcmd" + "github.com/edgelesssys/constellation/disk-mapper/recoverproto" "github.com/edgelesssys/constellation/internal/cloud/cloudprovider" "github.com/edgelesssys/constellation/internal/config" "github.com/edgelesssys/constellation/internal/constants" "github.com/edgelesssys/constellation/internal/crypto/testvector" "github.com/edgelesssys/constellation/internal/file" + "github.com/edgelesssys/constellation/internal/grpc/atlscredentials" + "github.com/edgelesssys/constellation/internal/grpc/dialer" + "github.com/edgelesssys/constellation/internal/grpc/testdialer" "github.com/edgelesssys/constellation/internal/state" "github.com/spf13/afero" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/grpc" ) func TestRecoverCmdArgumentValidation(t *testing.T) { @@ -52,10 +59,25 @@ func TestRecoverCmdArgumentValidation(t *testing.T) { func TestRecover(t *testing.T) { validState := state.ConstellationState{CloudProvider: "GCP"} invalidCSPState := state.ConstellationState{CloudProvider: "invalid"} + successActions := []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return stream.Send(&recoverproto.RecoverResponse{ + DiskUuid: "00000000-0000-0000-0000-000000000000", + }) + }, + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + } testCases := map[string]struct { existingState state.ConstellationState - client *stubRecoveryClient + recoverServerAPI *stubRecoveryServer masterSecret testvector.HKDF endpointFlag string masterSecretFlag string @@ -64,46 +86,36 @@ func TestRecover(t *testing.T) { wantErr bool }{ "works": { - existingState: validState, - client: &stubRecoveryClient{}, - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, + existingState: validState, + recoverServerAPI: &stubRecoveryServer{actions: successActions}, + endpointFlag: "192.0.2.1", + masterSecret: testvector.HKDFZero, }, "missing flags": { - wantErr: true, + recoverServerAPI: &stubRecoveryServer{actions: successActions}, + wantErr: true, }, "missing config": { - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, - configFlag: "nonexistent-config", - wantErr: true, + recoverServerAPI: &stubRecoveryServer{actions: successActions}, + endpointFlag: "192.0.2.1", + masterSecret: testvector.HKDFZero, + configFlag: "nonexistent-config", + wantErr: true, }, "missing state": { - existingState: validState, - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, - stateless: true, - wantErr: true, + existingState: validState, + recoverServerAPI: &stubRecoveryServer{actions: successActions}, + endpointFlag: "192.0.2.1", + masterSecret: testvector.HKDFZero, + stateless: true, + wantErr: true, }, "invalid cloud provider": { - existingState: invalidCSPState, - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, - wantErr: true, - }, - "connect fails": { - existingState: validState, - client: &stubRecoveryClient{connectErr: errors.New("connect failed")}, - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, - wantErr: true, - }, - "pushing state key fails": { - existingState: validState, - client: &stubRecoveryClient{pushStateDiskKeyErr: errors.New("pushing key failed")}, - endpointFlag: "192.0.2.1", - masterSecret: testvector.HKDFZero, - wantErr: true, + existingState: invalidCSPState, + recoverServerAPI: &stubRecoveryServer{actions: successActions}, + endpointFlag: "192.0.2.1", + masterSecret: testvector.HKDFZero, + wantErr: true, }, } @@ -113,6 +125,7 @@ func TestRecover(t *testing.T) { require := require.New(t) cmd := NewRecoverCmd() + cmd.SetContext(context.Background()) cmd.Flags().String("config", constants.ConfigFilename, "") // register persistent flag manually out := &bytes.Buffer{} cmd.SetOut(out) @@ -133,12 +146,32 @@ func TestRecover(t *testing.T) { config := defaultConfigWithExpectedMeasurements(t, config.Default(), cloudprovider.FromString(tc.existingState.CloudProvider)) require.NoError(fileHandler.WriteYAML(constants.ConfigFilename, config)) - require.NoError(fileHandler.WriteJSON("constellation-mastersecret.json", masterSecret{Key: tc.masterSecret.Secret, Salt: tc.masterSecret.Salt}, file.OptNone)) + require.NoError(fileHandler.WriteJSON( + "constellation-mastersecret.json", + masterSecret{Key: tc.masterSecret.Secret, Salt: tc.masterSecret.Salt}, + file.OptNone, + )) + if !tc.stateless { - require.NoError(fileHandler.WriteJSON(constants.StateFilename, tc.existingState, file.OptNone)) + require.NoError(fileHandler.WriteJSON( + constants.StateFilename, + tc.existingState, + file.OptNone, + )) } - err := recover(cmd, fileHandler, tc.client) + netDialer := testdialer.NewBufconnDialer() + newDialer := func(*cloudcmd.Validator) *dialer.Dialer { + return dialer.New(nil, nil, netDialer) + } + serverCreds := atlscredentials.New(nil, nil) + recoverServer := grpc.NewServer(grpc.Creds(serverCreds)) + recoverproto.RegisterAPIServer(recoverServer, tc.recoverServerAPI) + listener := netDialer.GetListener(net.JoinHostPort("192.0.2.1", strconv.Itoa(constants.RecoveryPort))) + go recoverServer.Serve(listener) + defer recoverServer.GracefulStop() + + err := recover(cmd, fileHandler, newDialer) if tc.wantErr { assert.Error(err) @@ -201,26 +234,156 @@ func TestParseRecoverFlags(t *testing.T) { } } -type stubRecoveryClient struct { - conn bool - connectErr error - closeErr error - pushStateDiskKeyErr error +func TestDoRecovery(t *testing.T) { + someErr := errors.New("error") + testCases := map[string]struct { + recoveryServer *stubRecoveryServer + wantErr bool + }{ + "success": { + recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return stream.Send(&recoverproto.RecoverResponse{ + DiskUuid: "00000000-0000-0000-0000-000000000000", + }) + }, + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + }}, + }, + "error on first recv": { + recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + return someErr + }, + }}, + wantErr: true, + }, + "error on send": { + recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return someErr + }, + }}, + wantErr: true, + }, + "error on second recv": { + recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return stream.Send(&recoverproto.RecoverResponse{ + DiskUuid: "00000000-0000-0000-0000-000000000000", + }) + }, + func(stream recoverproto.API_RecoverServer) error { + return someErr + }, + }}, + wantErr: true, + }, + "final message is an error": { + recoveryServer: &stubRecoveryServer{actions: []func(stream recoverproto.API_RecoverServer) error{ + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return stream.Send(&recoverproto.RecoverResponse{ + DiskUuid: "00000000-0000-0000-0000-000000000000", + }) + }, + func(stream recoverproto.API_RecoverServer) error { + _, err := stream.Recv() + return err + }, + func(stream recoverproto.API_RecoverServer) error { + return someErr + }, + }}, + wantErr: true, + }, + } - pushStateDiskKeyKey []byte + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + assert := assert.New(t) + + netDialer := testdialer.NewBufconnDialer() + serverCreds := atlscredentials.New(nil, nil) + recoverServer := grpc.NewServer(grpc.Creds(serverCreds)) + recoverproto.RegisterAPIServer(recoverServer, tc.recoveryServer) + addr := net.JoinHostPort("192.0.2.1", strconv.Itoa(constants.RecoveryPort)) + listener := netDialer.GetListener(addr) + go recoverServer.Serve(listener) + defer recoverServer.GracefulStop() + + recoverDoer := &recoverDoer{ + dialer: dialer.New(nil, nil, netDialer), + endpoint: addr, + measurementSecret: []byte("measurement-secret"), + getDiskKey: func(string) ([]byte, error) { + return []byte("disk-key"), nil + }, + } + + err := recoverDoer.Do(context.Background()) + if tc.wantErr { + assert.Error(err) + } else { + assert.NoError(err) + } + }) + } } -func (c *stubRecoveryClient) Connect(string, atls.Validator) error { - c.conn = true - return c.connectErr +func TestDeriveStateDiskKey(t *testing.T) { + testCases := map[string]struct { + masterSecret testvector.HKDF + }{ + "all zero": { + masterSecret: testvector.HKDFZero, + }, + "all 0xff": { + masterSecret: testvector.HKDF0xFF, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + assert := assert.New(t) + + getKeyFunc := getStateDiskKeyFunc(tc.masterSecret.Secret, tc.masterSecret.Salt) + stateDiskKey, err := getKeyFunc(tc.masterSecret.Info) + + assert.NoError(err) + assert.Equal(tc.masterSecret.Output, stateDiskKey) + }) + } } -func (c *stubRecoveryClient) Close() error { - c.conn = false - return c.closeErr +type stubRecoveryServer struct { + actions []func(recoverproto.API_RecoverServer) error + recoverproto.UnimplementedAPIServer } -func (c *stubRecoveryClient) Recover(_ context.Context, stateDiskKey, _ []byte) error { - c.pushStateDiskKeyKey = stateDiskKey - return c.pushStateDiskKeyErr +func (s *stubRecoveryServer) Recover(stream recoverproto.API_RecoverServer) error { + for _, action := range s.actions { + if err := action(stream); err != nil { + return err + } + } + return nil } diff --git a/cli/internal/gcp/client/instances.go b/cli/internal/gcp/client/instances.go index 0b6f25255..ecb6f99a3 100644 --- a/cli/internal/gcp/client/instances.go +++ b/cli/internal/gcp/client/instances.go @@ -91,6 +91,7 @@ func (c *Client) CreateInstances(ctx context.Context, input CreateInstancesInput {Name: proto.String("bootstrapper"), Port: proto.Int32(constants.BootstrapperPort)}, {Name: proto.String("verify"), Port: proto.Int32(constants.VerifyServiceNodePortGRPC)}, {Name: proto.String("konnectivity"), Port: proto.Int32(constants.KonnectivityPort)}, + {Name: proto.String("recovery"), Port: proto.Int32(constants.RecoveryPort)}, }, Template: c.controlPlaneTemplate, UID: c.uid, diff --git a/cli/internal/gcp/client/loadbalancer.go b/cli/internal/gcp/client/loadbalancer.go index 38598cd8e..16b91e229 100644 --- a/cli/internal/gcp/client/loadbalancer.go +++ b/cli/internal/gcp/client/loadbalancer.go @@ -44,6 +44,7 @@ func (c *Client) CreateLoadBalancers(ctx context.Context, isDebugCluster bool) e // // LoadBalancer definitions. // + // LoadBalancers added here also need to be referenced in instances.go:*Client.CreateInstances c.loadbalancers = append(c.loadbalancers, &loadBalancer{ name: c.buildResourceName("kube"), @@ -78,6 +79,14 @@ func (c *Client) CreateLoadBalancers(ctx context.Context, isDebugCluster bool) e healthCheck: computepb.HealthCheck_TCP, }) + c.loadbalancers = append(c.loadbalancers, &loadBalancer{ + name: c.buildResourceName("recovery"), + ip: c.loadbalancerIPname, + frontendPort: constants.RecoveryPort, + backendPortName: "recovery", + healthCheck: computepb.HealthCheck_TCP, + }) + // Only create when the debug cluster flag is set in the Constellation config if isDebugCluster { c.loadbalancers = append(c.loadbalancers, &loadBalancer{ diff --git a/cli/internal/gcp/client/loadbalancer_test.go b/cli/internal/gcp/client/loadbalancer_test.go index 3ef9ae876..42ba2bc3f 100644 --- a/cli/internal/gcp/client/loadbalancer_test.go +++ b/cli/internal/gcp/client/loadbalancer_test.go @@ -119,10 +119,10 @@ func TestCreateLoadBalancers(t *testing.T) { } if tc.isDebugCluster { - assert.Equal(5, len(client.loadbalancers)) + assert.Equal(6, len(client.loadbalancers)) assert.True(foundDebugdLB, "debugd loadbalancer not found in debug-mode") } else { - assert.Equal(4, len(client.loadbalancers)) + assert.Equal(5, len(client.loadbalancers)) assert.False(foundDebugdLB, "debugd loadbalancer found in non-debug mode") } }) diff --git a/cli/internal/proto/recover.go b/cli/internal/proto/recover.go deleted file mode 100644 index eff826910..000000000 --- a/cli/internal/proto/recover.go +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) Edgeless Systems GmbH - -SPDX-License-Identifier: AGPL-3.0-only -*/ - -package proto - -import ( - "context" - "errors" - "io" - - "github.com/edgelesssys/constellation/disk-mapper/recoverproto" - "github.com/edgelesssys/constellation/internal/atls" - "github.com/edgelesssys/constellation/internal/attestation" - "github.com/edgelesssys/constellation/internal/crypto" - "github.com/edgelesssys/constellation/internal/grpc/atlscredentials" - "go.uber.org/multierr" - "google.golang.org/grpc" -) - -// RecoverClient wraps a recoverAPI client and the connection to it. -type RecoverClient struct { - conn *grpc.ClientConn - recoverapi recoverproto.APIClient -} - -// Connect connects the client to a given server, using the handed -// Validators for the attestation of the connection. -// The connection must be closed using Close(). If connect is -// called on a client that already has a connection, the old -// connection is closed. -func (c *RecoverClient) Connect(endpoint string, validators atls.Validator) error { - creds := atlscredentials.New(nil, []atls.Validator{validators}) - - conn, err := grpc.Dial(endpoint, grpc.WithTransportCredentials(creds)) - if err != nil { - return err - } - _ = c.Close() - c.conn = conn - c.recoverapi = recoverproto.NewAPIClient(conn) - return nil -} - -// Close closes the grpc connection of the client. -// Close is idempotent and can be called on non connected clients -// without returning an error. -func (c *RecoverClient) Close() error { - if c.conn == nil { - return nil - } - if err := c.conn.Close(); err != nil { - return err - } - c.conn = nil - return nil -} - -// PushStateDiskKey pushes the state disk key to a constellation instance in recovery mode. -func (c *RecoverClient) Recover(ctx context.Context, masterSecret, salt []byte) (retErr error) { - if c.recoverapi == nil { - return errors.New("client is not connected") - } - - measurementSecret, err := attestation.DeriveMeasurementSecret(masterSecret, salt) - if err != nil { - return err - } - - recoverclient, err := c.recoverapi.Recover(ctx) - if err != nil { - return err - } - defer func() { - if err := recoverclient.CloseSend(); err != nil { - multierr.AppendInto(&retErr, err) - } - }() - - if err := recoverclient.Send(&recoverproto.RecoverMessage{ - Request: &recoverproto.RecoverMessage_MeasurementSecret{ - MeasurementSecret: measurementSecret, - }, - }); err != nil { - return err - } - res, err := recoverclient.Recv() - if err != nil { - return err - } - - stateDiskKey, err := deriveStateDiskKey(masterSecret, salt, res.DiskUuid) - if err != nil { - return err - } - - if err := recoverclient.Send(&recoverproto.RecoverMessage{ - Request: &recoverproto.RecoverMessage_StateDiskKey{ - StateDiskKey: stateDiskKey, - }, - }); err != nil { - return err - } - - if _, err := recoverclient.Recv(); err != nil && !errors.Is(err, io.EOF) { - return err - } - return nil -} - -// deriveStateDiskKey derives a state disk key from a master key, a salt, and a disk UUID. -func deriveStateDiskKey(masterKey, salt []byte, diskUUID string) ([]byte, error) { - return crypto.DeriveKey(masterKey, salt, []byte(crypto.HKDFInfoPrefix+diskUUID), crypto.StateDiskKeyLength) -} diff --git a/cli/internal/proto/recover_test.go b/cli/internal/proto/recover_test.go deleted file mode 100644 index ad2aef0b4..000000000 --- a/cli/internal/proto/recover_test.go +++ /dev/null @@ -1,38 +0,0 @@ -/* -Copyright (c) Edgeless Systems GmbH - -SPDX-License-Identifier: AGPL-3.0-only -*/ - -package proto - -import ( - "testing" - - "github.com/edgelesssys/constellation/internal/crypto/testvector" - "github.com/stretchr/testify/assert" -) - -func TestDeriveStateDiskKey(t *testing.T) { - testCases := map[string]struct { - masterSecret testvector.HKDF - }{ - "all zero": { - masterSecret: testvector.HKDFZero, - }, - "all 0xff": { - masterSecret: testvector.HKDF0xFF, - }, - } - - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - assert := assert.New(t) - - stateDiskKey, err := deriveStateDiskKey(tc.masterSecret.Secret, tc.masterSecret.Salt, tc.masterSecret.Info) - - assert.NoError(err) - assert.Equal(tc.masterSecret.Output, stateDiskKey) - }) - } -} diff --git a/disk-mapper/cmd/main.go b/disk-mapper/cmd/main.go index e262fb14a..d26b00436 100644 --- a/disk-mapper/cmd/main.go +++ b/disk-mapper/cmd/main.go @@ -33,6 +33,7 @@ import ( "github.com/edgelesssys/constellation/internal/constants" "github.com/edgelesssys/constellation/internal/grpc/dialer" "github.com/edgelesssys/constellation/internal/logger" + "github.com/edgelesssys/constellation/internal/role" tpmClient "github.com/google/go-tpm-tools/client" "github.com/google/go-tpm/tpm2" "github.com/spf13/afero" @@ -128,8 +129,13 @@ func main() { log.Named("rejoinClient"), ) - // set up recovery server - recoveryServer := recoveryserver.New(issuer, log.Named("recoveryServer")) + // set up recovery server if control-plane node + var recoveryServer setup.RecoveryServer + if self.Role == role.ControlPlane { + recoveryServer = recoveryserver.New(issuer, log.Named("recoveryServer")) + } else { + recoveryServer = recoveryserver.NewStub(log.Named("recoveryServer")) + } err = setupManger.PrepareExistingDisk(setup.NewNodeRecoverer(recoveryServer, rejoinClient)) } else { diff --git a/disk-mapper/internal/recoveryserver/server.go b/disk-mapper/internal/recoveryserver/server.go index 3f14e02a5..d593cc639 100644 --- a/disk-mapper/internal/recoveryserver/server.go +++ b/disk-mapper/internal/recoveryserver/server.go @@ -14,6 +14,7 @@ import ( "github.com/edgelesssys/constellation/disk-mapper/recoverproto" "github.com/edgelesssys/constellation/internal/atls" "github.com/edgelesssys/constellation/internal/grpc/atlscredentials" + "github.com/edgelesssys/constellation/internal/grpc/grpclog" "github.com/edgelesssys/constellation/internal/logger" "go.uber.org/zap" "google.golang.org/grpc" @@ -89,8 +90,9 @@ func (s *RecoveryServer) Serve(ctx context.Context, listener net.Listener, diskU func (s *RecoveryServer) Recover(stream recoverproto.API_RecoverServer) error { s.mux.Lock() defer s.mux.Unlock() + log := s.log.With(zap.String("peer", grpclog.PeerAddrFromContext(stream.Context()))) - s.log.Infof("Received recover call") + log.Infof("Received recover call") msg, err := stream.Recv() if err != nil { @@ -99,35 +101,53 @@ func (s *RecoveryServer) Recover(stream recoverproto.API_RecoverServer) error { measurementSecret, ok := msg.GetRequest().(*recoverproto.RecoverMessage_MeasurementSecret) if !ok { - s.log.Errorf("Received invalid first message: not a measurement secret") + log.Errorf("Received invalid first message: not a measurement secret") return status.Error(codes.InvalidArgument, "first message is not a measurement secret") } if err := stream.Send(&recoverproto.RecoverResponse{DiskUuid: s.diskUUID}); err != nil { - s.log.With(zap.Error(err)).Errorf("Failed to send disk UUID") + log.With(zap.Error(err)).Errorf("Failed to send disk UUID") return status.Error(codes.Internal, "failed to send response") } msg, err = stream.Recv() if err != nil { - s.log.With(zap.Error(err)).Errorf("Failed to receive disk key") + log.With(zap.Error(err)).Errorf("Failed to receive disk key") return status.Error(codes.Internal, "failed to receive message") } stateDiskKey, ok := msg.GetRequest().(*recoverproto.RecoverMessage_StateDiskKey) if !ok { - s.log.Errorf("Received invalid second message: not a state disk key") + log.Errorf("Received invalid second message: not a state disk key") return status.Error(codes.InvalidArgument, "second message is not a state disk key") } s.stateDiskKey = stateDiskKey.StateDiskKey s.measurementSecret = measurementSecret.MeasurementSecret - s.log.Infof("Received state disk key and measurement secret, shutting down server") + log.Infof("Received state disk key and measurement secret, shutting down server") go s.grpcServer.GracefulStop() return nil } +// stubServer implements the RecoveryServer interface but does not actually start a server. +type stubServer struct { + log *logger.Logger +} + +// NewStub returns a new stubbed RecoveryServer. +// We use this to avoid having to start a server for worker nodes, since they don't require manual recovery. +func NewStub(log *logger.Logger) *stubServer { + return &stubServer{log: log} +} + +// Serve waits until the context is canceled and returns nil. +func (s *stubServer) Serve(ctx context.Context, _ net.Listener, _ string) ([]byte, []byte, error) { + s.log.Infof("Running as worker node, skipping recovery server") + <-ctx.Done() + return nil, nil, ctx.Err() +} + type server interface { Serve(net.Listener) error GracefulStop() diff --git a/disk-mapper/internal/setup/setup.go b/disk-mapper/internal/setup/setup.go index 7e8b47a66..9af2bde91 100644 --- a/disk-mapper/internal/setup/setup.go +++ b/disk-mapper/internal/setup/setup.go @@ -160,21 +160,21 @@ func (s *SetupManager) saveConfiguration(passphrase []byte) error { return s.config.Generate(stateDiskMappedName, s.diskPath, filepath.Join(keyPath, keyFile), cryptsetupOptions) } -type recoveryServer interface { +type RecoveryServer interface { Serve(context.Context, net.Listener, string) (key, secret []byte, err error) } -type rejoinClient interface { +type RejoinClient interface { Start(context.Context, string) (key, secret []byte) } type nodeRecoverer struct { - recoveryServer recoveryServer - rejoinClient rejoinClient + recoveryServer RecoveryServer + rejoinClient RejoinClient } // NewNodeRecoverer initializes a new nodeRecoverer. -func NewNodeRecoverer(recoveryServer recoveryServer, rejoinClient rejoinClient) *nodeRecoverer { +func NewNodeRecoverer(recoveryServer RecoveryServer, rejoinClient RejoinClient) *nodeRecoverer { return &nodeRecoverer{ recoveryServer: recoveryServer, rejoinClient: rejoinClient, diff --git a/disk-mapper/recoverproto/recover.pb.go b/disk-mapper/recoverproto/recover.pb.go index ab56faf41..ce48d37f3 100644 --- a/disk-mapper/recoverproto/recover.pb.go +++ b/disk-mapper/recoverproto/recover.pb.go @@ -26,6 +26,7 @@ type RecoverMessage struct { unknownFields protoimpl.UnknownFields // Types that are assignable to Request: + // // *RecoverMessage_StateDiskKey // *RecoverMessage_MeasurementSecret Request isRecoverMessage_Request `protobuf_oneof:"request"` diff --git a/internal/constants/constants.go b/internal/constants/constants.go index b96ec717b..0a4077b95 100644 --- a/internal/constants/constants.go +++ b/internal/constants/constants.go @@ -42,7 +42,7 @@ const ( KMSPort = 9000 BootstrapperPort = 9000 KubernetesPort = 6443 - RecoveryPort = 9000 + RecoveryPort = 9999 EnclaveSSHPort = 2222 SSHPort = 22 NVMEOverTCPPort = 8009 diff --git a/internal/constants/firewall.go b/internal/constants/firewall.go index 413ae58d8..f39ee3b7a 100644 --- a/internal/constants/firewall.go +++ b/internal/constants/firewall.go @@ -49,6 +49,13 @@ var ( IPRange: "0.0.0.0/0", FromPort: KonnectivityPort, }, + { + Name: "recovery", + Description: "control-plane recovery", + Protocol: "tcp", + IPRange: "0.0.0.0/0", + FromPort: RecoveryPort, + }, } // IngressRulesDebug is the default set of ingress rules for a Constellation cluster with debug mode.