From 0df692e749be2159d328c406415fdf4d72c2e425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Wei=C3=9Fe?= Date: Tue, 5 Mar 2024 14:32:32 +0100 Subject: [PATCH] Mark disk for wiping on fatal failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel Weiße --- .../internal/diskencryption/diskencryption.go | 5 +++++ .../internal/joinclient/joinclient.go | 19 +++++++++++++++---- .../internal/joinclient/joinclient_test.go | 4 ++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/bootstrapper/internal/diskencryption/diskencryption.go b/bootstrapper/internal/diskencryption/diskencryption.go index eaf97e7ab..cfc80d397 100644 --- a/bootstrapper/internal/diskencryption/diskencryption.go +++ b/bootstrapper/internal/diskencryption/diskencryption.go @@ -60,6 +60,11 @@ func (c *DiskEncryption) UpdatePassphrase(passphrase string) error { return c.device.SetConstellationStateDiskToken(cryptsetup.SetDiskInitialized) } +// MarkDiskForReset marks the state disk as not initialized. +func (c *DiskEncryption) MarkDiskForReset() error { + return c.device.SetConstellationStateDiskToken(cryptsetup.SetDiskNotInitialized) +} + // getInitialPassphrase retrieves the initial passphrase used on first boot. func (c *DiskEncryption) getInitialPassphrase() (string, error) { passphrase, err := afero.ReadFile(c.fs, initialKeyPath) diff --git a/bootstrapper/internal/joinclient/joinclient.go b/bootstrapper/internal/joinclient/joinclient.go index a09ac5606..b7d21290d 100644 --- a/bootstrapper/internal/joinclient/joinclient.go +++ b/bootstrapper/internal/joinclient/joinclient.go @@ -118,7 +118,7 @@ func (c *JoinClient) Start(cleaner cleaner) error { diskUUID, err := c.getDiskUUID() if err != nil { c.log.With(slog.Any("error", err)).Error("Failed to get disk UUID") - return err + return err // unrecoverable error, but disk wasn't initialized yet } c.diskUUID = diskUUID @@ -159,7 +159,7 @@ func (c *JoinClient) Start(cleaner cleaner) error { if err := c.startNodeAndJoin(ticket, kubeletKey); err != nil { c.log.With(slog.Any("error", err)).Error("Failed to start node and join cluster") // unrecoverable error - return err + return errors.Join(err, c.markDiskForReset()) } return nil @@ -269,7 +269,8 @@ func (c *JoinClient) startNodeAndJoin(ticket *joinproto.IssueJoinTicketResponse, // There is already a cluster initialization in progress on // this node, so there is no need to also join the cluster, // as the initializing node is automatically part of the cluster. - return errors.New("node is already being initialized") + c.log.Info("Node is already being initialized. Aborting join process.") + return nil } c.cleaner.Clean() @@ -306,7 +307,7 @@ func (c *JoinClient) startNodeAndJoin(ticket *joinproto.IssueJoinTicketResponse, // We currently cannot recover from any failure in this function. Joining the k8s cluster // sometimes fails transiently, and we don't want to brick the node because of that. - for i := 0; i < 3; i++ { + for i := range 3 { err = c.joiner.JoinCluster(ctx, btd, c.role, ticket.KubernetesComponents, c.log) if err != nil { c.log.Error("failed to join k8s cluster", "role", c.role, "attempt", i, "error", err) @@ -369,6 +370,15 @@ func (c *JoinClient) getDiskUUID() (string, error) { return c.disk.UUID() } +func (c *JoinClient) markDiskForReset() error { + free, err := c.disk.Open() + if err != nil { + return fmt.Errorf("opening disk: %w", err) + } + defer free() + return c.disk.MarkDiskForReset() +} + func (c *JoinClient) getControlPlaneIPs(ctx context.Context) ([]string, error) { instances, err := c.metadataAPI.List(ctx) if err != nil { @@ -438,6 +448,7 @@ type encryptedDisk interface { UUID() (string, error) // UpdatePassphrase switches the initial random passphrase of the encrypted disk to a permanent passphrase. UpdatePassphrase(passphrase string) error + MarkDiskForReset() error } type cleaner interface { diff --git a/bootstrapper/internal/joinclient/joinclient_test.go b/bootstrapper/internal/joinclient/joinclient_test.go index c9f4e048a..c4311a75e 100644 --- a/bootstrapper/internal/joinclient/joinclient_test.go +++ b/bootstrapper/internal/joinclient/joinclient_test.go @@ -379,6 +379,10 @@ func (d *stubDisk) UpdatePassphrase(string) error { return d.updatePassphraseErr } +func (d *stubDisk) MarkDiskForReset() error { + return nil +} + type stubCleaner struct{} func (c stubCleaner) Clean() {}