ci: add e2e test for constellation recover (#845)

* AB#2256 Add recover e2e test

* AB#2256 move test & fix minor objections

* AB#2256 fix path

* AB#2256 rename hacky filename
This commit is contained in:
Moritz Sanft 2023-01-19 10:41:07 +01:00 committed by GitHub
parent 2cee7cb454
commit ae2db08f3a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 202 additions and 32 deletions

View file

@ -61,6 +61,9 @@ outputs:
kubeconfig: kubeconfig:
description: "The kubeconfig for the cluster." description: "The kubeconfig for the cluster."
value: ${{ steps.constellation-init.outputs.KUBECONFIG }} value: ${{ steps.constellation-init.outputs.KUBECONFIG }}
masterSecret:
description: "The master-secret for the cluster."
value: ${{ steps.constellation-init.outputs.MASTERSECRET }}
runs: runs:
using: "composite" using: "composite"
@ -178,6 +181,7 @@ runs:
run: | run: |
constellation init constellation init
echo "KUBECONFIG=$(pwd)/constellation-admin.conf" >> $GITHUB_OUTPUT echo "KUBECONFIG=$(pwd)/constellation-admin.conf" >> $GITHUB_OUTPUT
echo "MASTERSECRET=$(pwd)/constellation-mastersecret.json" >> $GITHUB_OUTPUT
- name: Wait for nodes to join and become ready - name: Wait for nodes to join and become ready
shell: bash shell: bash

73
.github/actions/e2e_recover/action.yml vendored Normal file
View file

@ -0,0 +1,73 @@
name: Constellation recover
description: "Recover a Constellation cluster with an unavailable control plane."
inputs:
controlNodesCount:
description: "The amount of control plane nodes in the cluster."
required: true
kubeconfig:
description: "The kubeconfig for the cluster."
required: true
masterSecret:
description: "The master-secret for the cluster."
required: true
cloudProvider:
description: "Which cloud provider to use."
required: true
gcpProject:
description: "The GCP project Constellation is deployed in."
required: false
resourceGroup:
description: "The Azure resource group Constellation is deployed in."
required: false
runs:
using: "composite"
steps:
- name: Restart worker node
shell: bash
run: |
WORKER_NODE=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o json | jq '.items[0].metadata.name' -r)
kubectl debug node/$WORKER_NODE --image=ubuntu -- bash -c "echo reboot > reboot.sh && chroot /host < reboot.sh"
kubectl wait --for=condition=Ready=false --timeout=10m node/$WORKER_NODE
kubectl wait --for=condition=Ready=true --timeout=10m --all nodes
env:
KUBECONFIG: ${{ inputs.kubeconfig }}
- name: Restart all control plane nodes
shell: bash
run: |
CONTROL_PLANE_NODES=$(kubectl get nodes --selector='node-role.kubernetes.io/control-plane' -o json | jq '.items[].metadata.name' -r)
for CONTROL_PLANE_NODE in ${CONTROL_PLANE_NODES}; do
kubectl debug node/$CONTROL_PLANE_NODE --image=ubuntu -- bash -c "echo reboot > reboot.sh && chroot /host < reboot.sh"
done
env:
KUBECONFIG: ${{ inputs.kubeconfig }}
- name: Constellation recover
shell: bash
run: |
timeout=600
start_time=$(date +%s)
recovered=0
while true; do
output=$(constellation recover --master-secret=${{ inputs.masterSecret }})
if echo "$output" | grep -q "Pushed recovery key."; then
echo "$output"
i=$(echo "$output" | grep -o "Pushed recovery key." | wc -l | sed 's/ //g')
recovered=$((recovered+i))
if [[ $recovered -eq ${{ inputs.controlNodesCount }} ]]; then
exit 0
fi
fi
current_time=$(date +%s)
if ((current_time - start_time > timeout)); then
echo "Control plane recovery timed out after $timeout seconds."
exit 1
fi
echo "Did not recover all nodes yet, retrying in 5 seconds [$recovered/${{ inputs.controlNodesCount }}]"
sleep 5
done
kubectl wait --for=condition=Ready --timeout=10m --all nodes
env:
KUBECONFIG: ${{ inputs.kubeconfig }}

View file

@ -57,7 +57,7 @@ inputs:
description: "The resource group to use" description: "The resource group to use"
required: false required: false
test: test:
description: "The test to run. Can currently be one of [sonobuoy full, sonobuoy quick, autoscaling, lb, k-bench, verify, nop]." description: "The test to run. Can currently be one of [sonobuoy full, sonobuoy quick, autoscaling, lb, k-bench, verify, recover, nop]."
required: true required: true
sonobuoyTestSuiteCmd: sonobuoyTestSuiteCmd:
description: "The sonobuoy test suite to run." description: "The sonobuoy test suite to run."
@ -72,7 +72,7 @@ runs:
using: "composite" using: "composite"
steps: steps:
- name: Check input - name: Check input
if: (!contains(fromJson('["sonobuoy full", "sonobuoy quick", "autoscaling", "k-bench", "verify", "lb", "nop"]'), inputs.test)) if: (!contains(fromJson('["sonobuoy full", "sonobuoy quick", "autoscaling", "k-bench", "verify", "lb", "recover", "nop"]'), inputs.test))
shell: bash shell: bash
run: | run: |
echo "Invalid input for test field: ${{ inputs.test }}" echo "Invalid input for test field: ${{ inputs.test }}"
@ -212,3 +212,14 @@ runs:
with: with:
cloudProvider: ${{ inputs.cloudProvider }} cloudProvider: ${{ inputs.cloudProvider }}
osImage: ${{ inputs.osImage }} osImage: ${{ inputs.osImage }}
- name: Run recover test
if: inputs.test == 'recover'
uses: ./.github/actions/e2e_recover
with:
controlNodesCount: ${{ inputs.controlNodesCount }}
cloudProvider: ${{ inputs.cloudProvider }}
gcpProject: ${{ inputs.gcpProject }}
kubeconfig: ${{ steps.constellation-create.outputs.kubeconfig }}
masterSecret: ${{ steps.constellation-create.outputs.masterSecret }}
azureResourceGroup: ${{ inputs.azureResourceGroup }}

View file

@ -37,6 +37,7 @@ on:
- "lb" - "lb"
- "k-bench" - "k-bench"
- "verify" - "verify"
- "recover"
- "nop" - "nop"
required: true required: true
kubernetesVersion: kubernetesVersion:

View file

@ -45,7 +45,7 @@ jobs:
fail-fast: false fail-fast: false
max-parallel: 5 max-parallel: 5
matrix: matrix:
test: ["sonobuoy full", "autoscaling", "k-bench", "lb", "verify"] test: ["sonobuoy full", "autoscaling", "k-bench", "lb", "verify", "recover"]
provider: ["gcp", "azure", "aws"] provider: ["gcp", "azure", "aws"]
version: ["1.23", "1.24", "1.25", "1.26"] version: ["1.23", "1.24", "1.25", "1.26"]
exclude: exclude:
@ -56,6 +56,13 @@ jobs:
version: "1.24" version: "1.24"
- test: "verify" - test: "verify"
version: "1.25" version: "1.25"
# Recover test runs only on latest version.
- test: "recover"
version: "1.23"
- test: "recover"
version: "1.24"
- test: "recover"
version: "1.25"
# Autoscaling test runs only on latest version. # Autoscaling test runs only on latest version.
- test: "autoscaling" - test: "autoscaling"
version: "1.23" version: "1.23"

View file

@ -17,7 +17,7 @@ import (
type terraformClient interface { type terraformClient interface {
PrepareWorkspace(path string, input terraform.Variables) error PrepareWorkspace(path string, input terraform.Variables) error
CreateCluster(ctx context.Context) (string, string, error) CreateCluster(ctx context.Context) (terraform.CreateOutput, error)
CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error) CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error)
DestroyCluster(ctx context.Context) error DestroyCluster(ctx context.Context) error
CleanUpWorkspace() error CleanUpWorkspace() error

View file

@ -29,6 +29,7 @@ type stubTerraformClient struct {
ip string ip string
initSecret string initSecret string
iamOutput terraform.IAMOutput iamOutput terraform.IAMOutput
uid string
cleanUpWorkspaceCalled bool cleanUpWorkspaceCalled bool
removeInstallerCalled bool removeInstallerCalled bool
destroyClusterCalled bool destroyClusterCalled bool
@ -39,8 +40,12 @@ type stubTerraformClient struct {
iamOutputErr error iamOutputErr error
} }
func (c *stubTerraformClient) CreateCluster(ctx context.Context) (string, string, error) { func (c *stubTerraformClient) CreateCluster(ctx context.Context) (terraform.CreateOutput, error) {
return c.ip, c.initSecret, c.createClusterErr return terraform.CreateOutput{
IP: c.ip,
Secret: c.initSecret,
UID: c.uid,
}, c.createClusterErr
} }
func (c *stubTerraformClient) CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error) { func (c *stubTerraformClient) CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error) {

View file

@ -123,15 +123,16 @@ func (c *Creator) createAWS(ctx context.Context, cl terraformClient, config *con
} }
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl}) defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx) tfOutput, err := cl.CreateCluster(ctx)
if err != nil { if err != nil {
return clusterid.File{}, err return clusterid.File{}, err
} }
return clusterid.File{ return clusterid.File{
CloudProvider: cloudprovider.AWS, CloudProvider: cloudprovider.AWS,
InitSecret: []byte(initSecret), InitSecret: []byte(tfOutput.Secret),
IP: ip, IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil }, nil
} }
@ -160,15 +161,16 @@ func (c *Creator) createGCP(ctx context.Context, cl terraformClient, config *con
} }
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl}) defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx) tfOutput, err := cl.CreateCluster(ctx)
if err != nil { if err != nil {
return clusterid.File{}, err return clusterid.File{}, err
} }
return clusterid.File{ return clusterid.File{
CloudProvider: cloudprovider.GCP, CloudProvider: cloudprovider.GCP,
InitSecret: []byte(initSecret), InitSecret: []byte(tfOutput.Secret),
IP: ip, IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil }, nil
} }
@ -200,15 +202,16 @@ func (c *Creator) createAzure(ctx context.Context, cl terraformClient, config *c
} }
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl}) defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx) tfOutput, err := cl.CreateCluster(ctx)
if err != nil { if err != nil {
return clusterid.File{}, err return clusterid.File{}, err
} }
return clusterid.File{ return clusterid.File{
CloudProvider: cloudprovider.Azure, CloudProvider: cloudprovider.Azure,
IP: ip, IP: tfOutput.IP,
InitSecret: []byte(initSecret), InitSecret: []byte(tfOutput.Secret),
UID: tfOutput.UID,
}, nil }, nil
} }
@ -313,14 +316,15 @@ func (c *Creator) createQEMU(ctx context.Context, cl terraformClient, lv libvirt
// Allow rollback of QEMU Terraform workspace from this point on // Allow rollback of QEMU Terraform workspace from this point on
qemuRollbacker.createdWorkspace = true qemuRollbacker.createdWorkspace = true
ip, initSecret, err := cl.CreateCluster(ctx) tfOutput, err := cl.CreateCluster(ctx)
if err != nil { if err != nil {
return clusterid.File{}, err return clusterid.File{}, err
} }
return clusterid.File{ return clusterid.File{
CloudProvider: cloudprovider.QEMU, CloudProvider: cloudprovider.QEMU,
InitSecret: []byte(initSecret), InitSecret: []byte(tfOutput.Secret),
IP: ip, IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil }, nil
} }

View file

@ -74,39 +74,59 @@ func (c *Client) PrepareWorkspace(path string, vars Variables) error {
} }
// CreateCluster creates a Constellation cluster using Terraform. // CreateCluster creates a Constellation cluster using Terraform.
func (c *Client) CreateCluster(ctx context.Context) (string, string, error) { func (c *Client) CreateCluster(ctx context.Context) (CreateOutput, error) {
if err := c.tf.Init(ctx); err != nil { if err := c.tf.Init(ctx); err != nil {
return "", "", err return CreateOutput{}, err
} }
if err := c.tf.Apply(ctx); err != nil { if err := c.tf.Apply(ctx); err != nil {
return "", "", err return CreateOutput{}, err
} }
tfState, err := c.tf.Show(ctx) tfState, err := c.tf.Show(ctx)
if err != nil { if err != nil {
return "", "", err return CreateOutput{}, err
} }
ipOutput, ok := tfState.Values.Outputs["ip"] ipOutput, ok := tfState.Values.Outputs["ip"]
if !ok { if !ok {
return "", "", errors.New("no IP output found") return CreateOutput{}, errors.New("no IP output found")
} }
ip, ok := ipOutput.Value.(string) ip, ok := ipOutput.Value.(string)
if !ok { if !ok {
return "", "", errors.New("invalid type in IP output: not a string") return CreateOutput{}, errors.New("invalid type in IP output: not a string")
} }
secretOutput, ok := tfState.Values.Outputs["initSecret"] secretOutput, ok := tfState.Values.Outputs["initSecret"]
if !ok { if !ok {
return "", "", errors.New("no initSecret output found") return CreateOutput{}, errors.New("no initSecret output found")
} }
secret, ok := secretOutput.Value.(string) secret, ok := secretOutput.Value.(string)
if !ok { if !ok {
return "", "", errors.New("invalid type in initSecret output: not a string") return CreateOutput{}, errors.New("invalid type in initSecret output: not a string")
} }
return ip, secret, nil uidOutput, ok := tfState.Values.Outputs["uid"]
if !ok {
return CreateOutput{}, errors.New("no uid output found")
}
uid, ok := uidOutput.Value.(string)
if !ok {
return CreateOutput{}, errors.New("invalid type in uid output: not a string")
}
return CreateOutput{
IP: ip,
Secret: secret,
UID: uid,
}, nil
}
// CreateOutput contains the Terraform output values of a cluster creation.
type CreateOutput struct {
IP string
Secret string
UID string
} }
// IAMOutput contains the output information of the Terraform IAM operations. // IAMOutput contains the output information of the Terraform IAM operations.

View file

@ -2,6 +2,10 @@ output "ip" {
value = aws_eip.lb.public_ip value = aws_eip.lb.public_ip
} }
output "uid" {
value = local.uid
}
output "initSecret" { output "initSecret" {
value = random_password.initSecret.result value = random_password.initSecret.result
sensitive = true sensitive = true

View file

@ -2,6 +2,10 @@ output "ip" {
value = azurerm_public_ip.loadbalancer_ip.ip_address value = azurerm_public_ip.loadbalancer_ip.ip_address
} }
output "uid" {
value = local.uid
}
output "initSecret" { output "initSecret" {
value = random_password.initSecret.result value = random_password.initSecret.result
sensitive = true sensitive = true

View file

@ -2,6 +2,10 @@ output "ip" {
value = google_compute_global_address.loadbalancer_ip.address value = google_compute_global_address.loadbalancer_ip.address
} }
output "uid" {
value = local.uid
}
output "initSecret" { output "initSecret" {
value = random_password.initSecret.result value = random_password.initSecret.result
sensitive = true sensitive = true

View file

@ -214,6 +214,9 @@ func TestCreateCluster(t *testing.T) {
"initSecret": { "initSecret": {
Value: "initSecret", Value: "initSecret",
}, },
"uid": {
Value: "12345abc",
},
}, },
}, },
} }
@ -300,6 +303,34 @@ func TestCreateCluster(t *testing.T) {
fs: afero.NewMemMapFs(), fs: afero.NewMemMapFs(),
wantErr: true, wantErr: true,
}, },
"no uid": {
pathBase: "terraform",
provider: cloudprovider.QEMU,
vars: qemuVars,
tf: &stubTerraform{
showState: &tfjson.State{
Values: &tfjson.StateValues{
Outputs: map[string]*tfjson.StateOutput{},
},
},
},
fs: afero.NewMemMapFs(),
wantErr: true,
},
"uid has wrong type": {
pathBase: "terraform",
provider: cloudprovider.QEMU,
vars: qemuVars,
tf: &stubTerraform{
showState: &tfjson.State{
Values: &tfjson.StateValues{
Outputs: map[string]*tfjson.StateOutput{"uid": {Value: 42}},
},
},
},
fs: afero.NewMemMapFs(),
wantErr: true,
},
} }
for name, tc := range testCases { for name, tc := range testCases {
@ -315,15 +346,16 @@ func TestCreateCluster(t *testing.T) {
path := path.Join(tc.pathBase, strings.ToLower(tc.provider.String())) path := path.Join(tc.pathBase, strings.ToLower(tc.provider.String()))
require.NoError(c.PrepareWorkspace(path, tc.vars)) require.NoError(c.PrepareWorkspace(path, tc.vars))
ip, initSecret, err := c.CreateCluster(context.Background()) tfOutput, err := c.CreateCluster(context.Background())
if tc.wantErr { if tc.wantErr {
assert.Error(err) assert.Error(err)
return return
} }
assert.NoError(err) assert.NoError(err)
assert.Equal("192.0.2.100", ip) assert.Equal("192.0.2.100", tfOutput.IP)
assert.Equal("initSecret", initSecret) assert.Equal("initSecret", tfOutput.Secret)
assert.Equal("12345abc", tfOutput.UID)
}) })
} }
} }

View file

@ -9,12 +9,13 @@ gh workflow run e2e-test-manual.yml \
--ref feat/e2e_pipeline \ # On your specific branch! --ref feat/e2e_pipeline \ # On your specific branch!
-F cloudProvider=gcp \ # With your ... -F cloudProvider=gcp \ # With your ...
-F controlNodesCount=1 -F workerNodesCount=2 \ # ... settings -F controlNodesCount=1 -F workerNodesCount=2 \ # ... settings
-F machineType=n2d-standard-4 -F machineType=n2d-standard-4 \
-F test=nop
``` ```
### E2E Test Suites ### E2E Test Suites
Here are some examples for test suits you might want to run. Values for `sonobuoyTestSuiteCmd`: Here are some examples for test suites you might want to run. Values for `sonobuoyTestSuiteCmd`:
* `--mode quick` * `--mode quick`
* Runs a set of tests that are known to be quick to execute! (<1 min) * Runs a set of tests that are known to be quick to execute! (<1 min)