diff --git a/bootstrapper/internal/helm/helm.go b/bootstrapper/internal/helm/helm.go index cd354a295..55ac43cb7 100644 --- a/bootstrapper/internal/helm/helm.go +++ b/bootstrapper/internal/helm/helm.go @@ -35,6 +35,8 @@ import ( const ( // timeout is the maximum time given to the helm client. timeout = 5 * time.Minute + // maximumRetryAttempts is the maximum number of attempts to retry a helm install. + maximumRetryAttempts = 3 ) // Client is used to install microservice during cluster initialization. It is a wrapper for a helm install action. @@ -57,8 +59,6 @@ func New(log *logger.Logger) (*Client, error) { action := action.NewInstall(actionConfig) action.Namespace = constants.HelmNamespace action.Timeout = timeout - action.Atomic = true - action.Wait = true return &Client{ action, @@ -69,6 +69,9 @@ func New(log *logger.Logger) (*Client, error) { // InstallConstellationServices installs the constellation-services chart. In the future this chart should bundle all microservices. func (h *Client) InstallConstellationServices(ctx context.Context, release helm.Release, extraVals map[string]any) error { h.ReleaseName = release.ReleaseName + if err := h.setWaitMode(release.WaitMode); err != nil { + return err + } mergedVals := helm.MergeMaps(release.Values, extraVals) @@ -79,6 +82,9 @@ func (h *Client) InstallConstellationServices(ctx context.Context, release helm. func (h *Client) InstallCertManager(ctx context.Context, release helm.Release) error { h.ReleaseName = release.ReleaseName h.Timeout = 10 * time.Minute + if err := h.setWaitMode(release.WaitMode); err != nil { + return err + } return h.install(ctx, release.Chart, release.Values) } @@ -86,6 +92,9 @@ func (h *Client) InstallCertManager(ctx context.Context, release helm.Release) e // InstallOperators installs the Constellation Operators. func (h *Client) InstallOperators(ctx context.Context, release helm.Release, extraVals map[string]any) error { h.ReleaseName = release.ReleaseName + if err := h.setWaitMode(release.WaitMode); err != nil { + return err + } mergedVals := helm.MergeMaps(release.Values, extraVals) @@ -95,6 +104,9 @@ func (h *Client) InstallOperators(ctx context.Context, release helm.Release, ext // InstallCilium sets up the cilium pod network. func (h *Client) InstallCilium(ctx context.Context, kubectl k8sapi.Client, release helm.Release, in k8sapi.SetupPodNetworkInput) error { h.ReleaseName = release.ReleaseName + if err := h.setWaitMode(release.WaitMode); err != nil { + return err + } timeoutS := int64(10) // allow coredns to run on uninitialized nodes (required by cloud-controller-manager) @@ -164,9 +176,22 @@ func (h *Client) installCiliumGCP(ctx context.Context, release helm.Release, nod // install tries to install the given chart and aborts after ~5 tries. // The function will wait 30 seconds before retrying a failed installation attempt. -// After 10 minutes the retrier will be canceled and the function returns with an error. +// After 3 tries, the retrier will be canceled and the function returns with an error. func (h *Client) install(ctx context.Context, chartRaw []byte, values map[string]any) error { + var retries int retriable := func(err error) bool { + // abort after maximumRetryAttempts tries. + if retries >= maximumRetryAttempts { + return false + } + retries++ + // only retry if atomic is set + // otherwise helm doesn't uninstall + // the release on failure + if !h.Atomic { + return false + } + // check if error is retriable return wait.Interrupted(err) || strings.Contains(err.Error(), "connection refused") } @@ -185,14 +210,8 @@ func (h *Client) install(ctx context.Context, chartRaw []byte, values map[string } retrier := retry.NewIntervalRetrier(doer, 30*time.Second, retriable) - // Since we have no precise retry condition we want to stop retrying after 10 minutes. - // The helm library only reports a timeout error in the error cases we currently know. - // Other errors will not be retried. - newCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - retryLoopStartTime := time.Now() - if err := retrier.Do(newCtx); err != nil { + if err := retrier.Do(ctx); err != nil { return fmt.Errorf("helm install: %w", err) } retryLoopFinishDuration := time.Since(retryLoopStartTime) @@ -201,6 +220,23 @@ func (h *Client) install(ctx context.Context, chartRaw []byte, values map[string return nil } +func (h *Client) setWaitMode(waitMode helm.WaitMode) error { + switch waitMode { + case helm.WaitModeNone: + h.Wait = false + h.Atomic = false + case helm.WaitModeWait: + h.Wait = true + h.Atomic = false + case helm.WaitModeAtomic: + h.Wait = true + h.Atomic = true + default: + return fmt.Errorf("unknown wait mode %q", waitMode) + } + return nil +} + // installDoer is a help struct to enable retrying helm's install action. type installDoer struct { client *Client diff --git a/cli/internal/cmd/BUILD.bazel b/cli/internal/cmd/BUILD.bazel index f116a53c3..579552b21 100644 --- a/cli/internal/cmd/BUILD.bazel +++ b/cli/internal/cmd/BUILD.bazel @@ -65,6 +65,7 @@ go_library( "//internal/config/migration", "//internal/constants", "//internal/crypto", + "//internal/deploy/helm", "//internal/file", "//internal/grpc/dialer", "//internal/grpc/grpclog", diff --git a/cli/internal/cmd/init.go b/cli/internal/cmd/init.go index 68ea4d387..c67695c7c 100644 --- a/cli/internal/cmd/init.go +++ b/cli/internal/cmd/init.go @@ -43,6 +43,7 @@ import ( "github.com/edgelesssys/constellation/v2/internal/config" "github.com/edgelesssys/constellation/v2/internal/constants" "github.com/edgelesssys/constellation/v2/internal/crypto" + helmdeploy "github.com/edgelesssys/constellation/v2/internal/deploy/helm" "github.com/edgelesssys/constellation/v2/internal/file" "github.com/edgelesssys/constellation/v2/internal/grpc/dialer" "github.com/edgelesssys/constellation/v2/internal/grpc/grpclog" @@ -65,6 +66,7 @@ func NewInitCmd() *cobra.Command { } cmd.Flags().String("master-secret", "", "path to base64-encoded master secret") cmd.Flags().Bool("conformance", false, "enable conformance mode") + cmd.Flags().Bool("skip-helm-wait", false, "install helm charts without waiting for deployments to be ready") cmd.Flags().Bool("merge-kubeconfig", false, "merge Constellation kubeconfig file with default kubeconfig file in $HOME/.kube/config") return cmd } @@ -174,7 +176,7 @@ func (i *initCmd) initialize(cmd *cobra.Command, newDialer func(validator atls.V } helmLoader := helm.NewLoader(provider, k8sVersion) i.log.Debugf("Created new Helm loader") - helmDeployments, err := helmLoader.Load(conf, flags.conformance, masterSecret.Key, masterSecret.Salt) + helmDeployments, err := helmLoader.Load(conf, flags.conformance, flags.helmWaitMode, masterSecret.Key, masterSecret.Salt) i.log.Debugf("Loaded Helm deployments") if err != nil { return fmt.Errorf("loading Helm charts: %w", err) @@ -409,6 +411,15 @@ func (i *initCmd) evalFlagArgs(cmd *cobra.Command) (initFlags, error) { return initFlags{}, fmt.Errorf("parsing conformance flag: %w", err) } i.log.Debugf("Conformance flag is %t", conformance) + skipHelmWait, err := cmd.Flags().GetBool("skip-helm-wait") + if err != nil { + return initFlags{}, fmt.Errorf("parsing skip-helm-wait flag: %w", err) + } + helmWaitMode := helmdeploy.WaitModeAtomic + if skipHelmWait { + helmWaitMode = helmdeploy.WaitModeNone + } + i.log.Debugf("Helm wait flag is %t", skipHelmWait) configPath, err := cmd.Flags().GetString("config") if err != nil { return initFlags{}, fmt.Errorf("parsing config path flag: %w", err) @@ -429,6 +440,7 @@ func (i *initCmd) evalFlagArgs(cmd *cobra.Command) (initFlags, error) { return initFlags{ configPath: configPath, conformance: conformance, + helmWaitMode: helmWaitMode, masterSecretPath: masterSecretPath, force: force, mergeConfigs: mergeConfigs, @@ -440,6 +452,7 @@ type initFlags struct { configPath string masterSecretPath string conformance bool + helmWaitMode helmdeploy.WaitMode force bool mergeConfigs bool } diff --git a/cli/internal/helm/loader.go b/cli/internal/helm/loader.go index 27bdbfb33..3b7cdf258 100644 --- a/cli/internal/helm/loader.go +++ b/cli/internal/helm/loader.go @@ -101,24 +101,24 @@ func NewLoader(csp cloudprovider.Provider, k8sVersion versions.ValidK8sVersion) } // Load the embedded helm charts. -func (i *ChartLoader) Load(config *config.Config, conformanceMode bool, masterSecret, salt []byte) ([]byte, error) { - ciliumRelease, err := i.loadRelease(ciliumInfo) +func (i *ChartLoader) Load(config *config.Config, conformanceMode bool, helmWaitMode helm.WaitMode, masterSecret, salt []byte) ([]byte, error) { + ciliumRelease, err := i.loadRelease(ciliumInfo, helmWaitMode) if err != nil { return nil, fmt.Errorf("loading cilium: %w", err) } extendCiliumValues(ciliumRelease.Values, conformanceMode) - certManagerRelease, err := i.loadRelease(certManagerInfo) + certManagerRelease, err := i.loadRelease(certManagerInfo, helmWaitMode) if err != nil { return nil, fmt.Errorf("loading cert-manager: %w", err) } - operatorRelease, err := i.loadRelease(constellationOperatorsInfo) + operatorRelease, err := i.loadRelease(constellationOperatorsInfo, helmWaitMode) if err != nil { return nil, fmt.Errorf("loading operators: %w", err) } - conServicesRelease, err := i.loadRelease(constellationServicesInfo) + conServicesRelease, err := i.loadRelease(constellationServicesInfo, helmWaitMode) if err != nil { return nil, fmt.Errorf("loading constellation-services: %w", err) } @@ -136,7 +136,7 @@ func (i *ChartLoader) Load(config *config.Config, conformanceMode bool, masterSe } // loadRelease loads the embedded chart and values depending on the given info argument. -func (i *ChartLoader) loadRelease(info chartInfo) (helm.Release, error) { +func (i *ChartLoader) loadRelease(info chartInfo, helmWaitMode helm.WaitMode) (helm.Release, error) { chart, err := loadChartsDir(helmFS, info.path) if err != nil { return helm.Release{}, fmt.Errorf("loading %s chart: %w", info.releaseName, err) @@ -168,7 +168,7 @@ func (i *ChartLoader) loadRelease(info chartInfo) (helm.Release, error) { return helm.Release{}, fmt.Errorf("packaging %s chart: %w", info.releaseName, err) } - return helm.Release{Chart: chartRaw, Values: values, ReleaseName: info.releaseName}, nil + return helm.Release{Chart: chartRaw, Values: values, ReleaseName: info.releaseName, WaitMode: helmWaitMode}, nil } // loadCiliumValues is used to separate the marshalling step from the loading step. diff --git a/cli/internal/helm/loader_test.go b/cli/internal/helm/loader_test.go index 235aa483f..0fe1266b3 100644 --- a/cli/internal/helm/loader_test.go +++ b/cli/internal/helm/loader_test.go @@ -39,7 +39,7 @@ func TestLoad(t *testing.T) { config := &config.Config{Provider: config.ProviderConfig{GCP: &config.GCPConfig{}}} chartLoader := ChartLoader{csp: config.GetProvider()} - release, err := chartLoader.Load(config, true, []byte("secret"), []byte("salt")) + release, err := chartLoader.Load(config, true, helm.WaitModeAtomic, []byte("secret"), []byte("salt")) require.NoError(err) var helmReleases helm.Releases diff --git a/docs/docs/reference/cli.md b/docs/docs/reference/cli.md index 2ff90666f..fd32c54cd 100644 --- a/docs/docs/reference/cli.md +++ b/docs/docs/reference/cli.md @@ -253,6 +253,7 @@ constellation init [flags] -h, --help help for init --master-secret string path to base64-encoded master secret --merge-kubeconfig merge Constellation kubeconfig file with default kubeconfig file in $HOME/.kube/config + --skip-helm-wait install helm charts without waiting for deployments to be ready ``` ### Options inherited from parent commands diff --git a/internal/deploy/helm/helm.go b/internal/deploy/helm/helm.go index 8170bbef9..81ac0a8c4 100644 --- a/internal/deploy/helm/helm.go +++ b/internal/deploy/helm/helm.go @@ -12,6 +12,7 @@ type Release struct { Chart []byte Values map[string]any ReleaseName string + WaitMode WaitMode } // Releases bundles all helm releases to be deployed to Constellation. @@ -43,3 +44,16 @@ func MergeMaps(a, b map[string]any) map[string]any { } return out } + +// WaitMode specifies the wait mode for a helm release. +type WaitMode string + +const ( + // WaitModeNone specifies that the helm release should not wait for the resources to be ready. + WaitModeNone WaitMode = "" + // WaitModeWait specifies that the helm release should wait for the resources to be ready. + WaitModeWait WaitMode = "wait" + // WaitModeAtomic specifies that the helm release should + // wait for the resources to be ready and roll back atomically on failure. + WaitModeAtomic WaitMode = "atomic" +)