Retry helm apply on any error (#2322)

Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
Daniel Weiße 2023-09-08 22:54:01 +02:00 committed by GitHub
parent 2cb0ce0b1b
commit 5706e69091
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 22 deletions

View File

@ -438,7 +438,6 @@ go_library(
"//internal/versions",
"@com_github_pkg_errors//:errors",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
"@io_k8s_apimachinery//pkg/util/wait",
"@io_k8s_client_go//kubernetes",
"@io_k8s_client_go//tools/clientcmd",
"@io_k8s_client_go//util/retry",
@ -457,6 +456,7 @@ go_test(
srcs = [
"helm_test.go",
"loader_test.go",
"retryaction_test.go",
],
data = glob(["testdata/**"]),
embed = [":helm"],

View File

@ -21,7 +21,8 @@ import (
const (
// timeout is the maximum time given per helm action.
timeout = 10 * time.Minute
timeout = 10 * time.Minute
applyRetryInterval = 30 * time.Second
)
type applyAction interface {
@ -85,7 +86,7 @@ func (a *installAction) Apply(ctx context.Context) error {
return err
}
}
if err := retryApply(ctx, a, a.log); err != nil {
if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil {
return err
}
@ -142,7 +143,7 @@ func (a *upgradeAction) Apply(ctx context.Context) error {
return err
}
}
if err := retryApply(ctx, a, a.log); err != nil {
if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil {
return err
}
if a.postUpgrade != nil {

View File

@ -9,11 +9,9 @@ package helm
import (
"context"
"fmt"
"strings"
"time"
"github.com/edgelesssys/constellation/v2/internal/retry"
"k8s.io/apimachinery/pkg/util/wait"
)
const (
@ -28,7 +26,7 @@ type retrieableApplier interface {
}
// retryApply retries the given retriable action.
func retryApply(ctx context.Context, action retrieableApplier, log debugLog) error {
func retryApply(ctx context.Context, action retrieableApplier, retryInterval time.Duration, log debugLog) error {
var retries int
retriable := func(err error) bool {
// abort after maximumRetryAttempts tries.
@ -37,20 +35,14 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err
}
retries++
// only retry if atomic is set
// otherwise helm doesn't uninstall
// the release on failure
if !action.IsAtomic() {
return false
}
// check if error is retriable
return wait.Interrupted(err) ||
strings.Contains(err.Error(), "connection refused")
// otherwise helm doesn't uninstall the release on failure
return action.IsAtomic()
}
doer := applyDoer{
action,
log,
applier: action,
log: log,
}
retrier := retry.NewIntervalRetrier(doer, 30*time.Second, retriable)
retrier := retry.NewIntervalRetrier(doer, retryInterval, retriable)
retryLoopStartTime := time.Now()
if err := retrier.Do(ctx); err != nil {
@ -63,15 +55,15 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err
// applyDoer is a helper struct to enable retrying helm actions.
type applyDoer struct {
Applier retrieableApplier
applier retrieableApplier
log debugLog
}
// Do tries to apply the action.
func (i applyDoer) Do(ctx context.Context) error {
i.log.Debugf("Trying to apply Helm chart %s", i.Applier.ReleaseName())
if err := i.Applier.apply(ctx); err != nil {
i.log.Debugf("Helm chart installation %s failed: %v", i.Applier.ReleaseName(), err)
i.log.Debugf("Trying to apply Helm chart %s", i.applier.ReleaseName())
if err := i.applier.apply(ctx); err != nil {
i.log.Debugf("Helm chart installation %s failed: %v", i.applier.ReleaseName(), err)
return err
}

View File

@ -0,0 +1,104 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package helm
import (
"context"
"testing"
"time"
"github.com/edgelesssys/constellation/v2/internal/logger"
"github.com/stretchr/testify/assert"
)
func TestRetryApply(t *testing.T) {
testCases := map[string]struct {
applier *stubRetriableApplier
wantErr bool
}{
"success": {
applier: &stubRetriableApplier{
atomic: true,
},
},
"two errors": {
applier: &stubRetriableApplier{
applyErrs: []error{
assert.AnError,
assert.AnError,
nil,
},
atomic: true,
},
},
"retries are aborted after maximumRetryAttempts": {
applier: &stubRetriableApplier{
applyErrs: []error{
assert.AnError,
assert.AnError,
assert.AnError,
assert.AnError,
},
atomic: true,
},
wantErr: true,
},
"non atomic actions are not retried": {
applier: &stubRetriableApplier{
atomic: false,
applyErrs: []error{
assert.AnError,
assert.AnError,
nil,
},
},
wantErr: true,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
err := retryApply(context.Background(), tc.applier, time.Millisecond, logger.NewTest(t))
if tc.wantErr {
assert.Error(err)
} else {
assert.NoError(err)
}
})
}
}
type stubRetriableApplier struct {
atomic bool
applyErrs []error
}
func (s *stubRetriableApplier) apply(context.Context) error {
if len(s.applyErrs) == 0 {
return nil
}
// return the first error in the list
// and remove it from the list
err := s.applyErrs[0]
if len(s.applyErrs) > 1 {
s.applyErrs = s.applyErrs[1:]
} else {
s.applyErrs = nil
}
return err
}
func (s *stubRetriableApplier) ReleaseName() string {
return ""
}
func (s *stubRetriableApplier) IsAtomic() bool {
return s.atomic
}