diff --git a/cli/internal/helm/BUILD.bazel b/cli/internal/helm/BUILD.bazel index ef5dd8f7e..566faf914 100644 --- a/cli/internal/helm/BUILD.bazel +++ b/cli/internal/helm/BUILD.bazel @@ -438,7 +438,6 @@ go_library( "//internal/versions", "@com_github_pkg_errors//:errors", "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", - "@io_k8s_apimachinery//pkg/util/wait", "@io_k8s_client_go//kubernetes", "@io_k8s_client_go//tools/clientcmd", "@io_k8s_client_go//util/retry", @@ -457,6 +456,7 @@ go_test( srcs = [ "helm_test.go", "loader_test.go", + "retryaction_test.go", ], data = glob(["testdata/**"]), embed = [":helm"], diff --git a/cli/internal/helm/action.go b/cli/internal/helm/action.go index 98de35266..621d92b89 100644 --- a/cli/internal/helm/action.go +++ b/cli/internal/helm/action.go @@ -21,7 +21,8 @@ import ( const ( // timeout is the maximum time given per helm action. - timeout = 10 * time.Minute + timeout = 10 * time.Minute + applyRetryInterval = 30 * time.Second ) type applyAction interface { @@ -85,7 +86,7 @@ func (a *installAction) Apply(ctx context.Context) error { return err } } - if err := retryApply(ctx, a, a.log); err != nil { + if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil { return err } @@ -142,7 +143,7 @@ func (a *upgradeAction) Apply(ctx context.Context) error { return err } } - if err := retryApply(ctx, a, a.log); err != nil { + if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil { return err } if a.postUpgrade != nil { diff --git a/cli/internal/helm/retryaction.go b/cli/internal/helm/retryaction.go index e655e5d27..ca61944b0 100644 --- a/cli/internal/helm/retryaction.go +++ b/cli/internal/helm/retryaction.go @@ -9,11 +9,9 @@ package helm import ( "context" "fmt" - "strings" "time" "github.com/edgelesssys/constellation/v2/internal/retry" - "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -28,7 +26,7 @@ type retrieableApplier interface { } // retryApply retries the given retriable action. -func retryApply(ctx context.Context, action retrieableApplier, log debugLog) error { +func retryApply(ctx context.Context, action retrieableApplier, retryInterval time.Duration, log debugLog) error { var retries int retriable := func(err error) bool { // abort after maximumRetryAttempts tries. @@ -37,20 +35,14 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err } retries++ // only retry if atomic is set - // otherwise helm doesn't uninstall - // the release on failure - if !action.IsAtomic() { - return false - } - // check if error is retriable - return wait.Interrupted(err) || - strings.Contains(err.Error(), "connection refused") + // otherwise helm doesn't uninstall the release on failure + return action.IsAtomic() } doer := applyDoer{ - action, - log, + applier: action, + log: log, } - retrier := retry.NewIntervalRetrier(doer, 30*time.Second, retriable) + retrier := retry.NewIntervalRetrier(doer, retryInterval, retriable) retryLoopStartTime := time.Now() if err := retrier.Do(ctx); err != nil { @@ -63,15 +55,15 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err // applyDoer is a helper struct to enable retrying helm actions. type applyDoer struct { - Applier retrieableApplier + applier retrieableApplier log debugLog } // Do tries to apply the action. func (i applyDoer) Do(ctx context.Context) error { - i.log.Debugf("Trying to apply Helm chart %s", i.Applier.ReleaseName()) - if err := i.Applier.apply(ctx); err != nil { - i.log.Debugf("Helm chart installation %s failed: %v", i.Applier.ReleaseName(), err) + i.log.Debugf("Trying to apply Helm chart %s", i.applier.ReleaseName()) + if err := i.applier.apply(ctx); err != nil { + i.log.Debugf("Helm chart installation %s failed: %v", i.applier.ReleaseName(), err) return err } diff --git a/cli/internal/helm/retryaction_test.go b/cli/internal/helm/retryaction_test.go new file mode 100644 index 000000000..6a39d7cb2 --- /dev/null +++ b/cli/internal/helm/retryaction_test.go @@ -0,0 +1,104 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ + +package helm + +import ( + "context" + "testing" + "time" + + "github.com/edgelesssys/constellation/v2/internal/logger" + "github.com/stretchr/testify/assert" +) + +func TestRetryApply(t *testing.T) { + testCases := map[string]struct { + applier *stubRetriableApplier + wantErr bool + }{ + "success": { + applier: &stubRetriableApplier{ + atomic: true, + }, + }, + "two errors": { + applier: &stubRetriableApplier{ + applyErrs: []error{ + assert.AnError, + assert.AnError, + nil, + }, + atomic: true, + }, + }, + "retries are aborted after maximumRetryAttempts": { + applier: &stubRetriableApplier{ + applyErrs: []error{ + assert.AnError, + assert.AnError, + assert.AnError, + assert.AnError, + }, + atomic: true, + }, + wantErr: true, + }, + "non atomic actions are not retried": { + applier: &stubRetriableApplier{ + atomic: false, + applyErrs: []error{ + assert.AnError, + assert.AnError, + nil, + }, + }, + wantErr: true, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + assert := assert.New(t) + + err := retryApply(context.Background(), tc.applier, time.Millisecond, logger.NewTest(t)) + if tc.wantErr { + assert.Error(err) + } else { + assert.NoError(err) + } + }) + } +} + +type stubRetriableApplier struct { + atomic bool + applyErrs []error +} + +func (s *stubRetriableApplier) apply(context.Context) error { + if len(s.applyErrs) == 0 { + return nil + } + + // return the first error in the list + // and remove it from the list + err := s.applyErrs[0] + if len(s.applyErrs) > 1 { + s.applyErrs = s.applyErrs[1:] + } else { + s.applyErrs = nil + } + return err +} + +func (s *stubRetriableApplier) ReleaseName() string { + return "" +} + +func (s *stubRetriableApplier) IsAtomic() bool { + return s.atomic +}