ci: add e2e test for constellation recover (#845)

* AB#2256 Add recover e2e test

* AB#2256 move test & fix minor objections

* AB#2256 fix path

* AB#2256 rename hacky filename
This commit is contained in:
Moritz Sanft 2023-01-19 10:41:07 +01:00 committed by GitHub
parent 2cee7cb454
commit ae2db08f3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 202 additions and 32 deletions

View File

@ -61,6 +61,9 @@ outputs:
kubeconfig:
description: "The kubeconfig for the cluster."
value: ${{ steps.constellation-init.outputs.KUBECONFIG }}
masterSecret:
description: "The master-secret for the cluster."
value: ${{ steps.constellation-init.outputs.MASTERSECRET }}
runs:
using: "composite"
@ -178,6 +181,7 @@ runs:
run: |
constellation init
echo "KUBECONFIG=$(pwd)/constellation-admin.conf" >> $GITHUB_OUTPUT
echo "MASTERSECRET=$(pwd)/constellation-mastersecret.json" >> $GITHUB_OUTPUT
- name: Wait for nodes to join and become ready
shell: bash

73
.github/actions/e2e_recover/action.yml vendored Normal file
View File

@ -0,0 +1,73 @@
name: Constellation recover
description: "Recover a Constellation cluster with an unavailable control plane."
inputs:
controlNodesCount:
description: "The amount of control plane nodes in the cluster."
required: true
kubeconfig:
description: "The kubeconfig for the cluster."
required: true
masterSecret:
description: "The master-secret for the cluster."
required: true
cloudProvider:
description: "Which cloud provider to use."
required: true
gcpProject:
description: "The GCP project Constellation is deployed in."
required: false
resourceGroup:
description: "The Azure resource group Constellation is deployed in."
required: false
runs:
using: "composite"
steps:
- name: Restart worker node
shell: bash
run: |
WORKER_NODE=$(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o json | jq '.items[0].metadata.name' -r)
kubectl debug node/$WORKER_NODE --image=ubuntu -- bash -c "echo reboot > reboot.sh && chroot /host < reboot.sh"
kubectl wait --for=condition=Ready=false --timeout=10m node/$WORKER_NODE
kubectl wait --for=condition=Ready=true --timeout=10m --all nodes
env:
KUBECONFIG: ${{ inputs.kubeconfig }}
- name: Restart all control plane nodes
shell: bash
run: |
CONTROL_PLANE_NODES=$(kubectl get nodes --selector='node-role.kubernetes.io/control-plane' -o json | jq '.items[].metadata.name' -r)
for CONTROL_PLANE_NODE in ${CONTROL_PLANE_NODES}; do
kubectl debug node/$CONTROL_PLANE_NODE --image=ubuntu -- bash -c "echo reboot > reboot.sh && chroot /host < reboot.sh"
done
env:
KUBECONFIG: ${{ inputs.kubeconfig }}
- name: Constellation recover
shell: bash
run: |
timeout=600
start_time=$(date +%s)
recovered=0
while true; do
output=$(constellation recover --master-secret=${{ inputs.masterSecret }})
if echo "$output" | grep -q "Pushed recovery key."; then
echo "$output"
i=$(echo "$output" | grep -o "Pushed recovery key." | wc -l | sed 's/ //g')
recovered=$((recovered+i))
if [[ $recovered -eq ${{ inputs.controlNodesCount }} ]]; then
exit 0
fi
fi
current_time=$(date +%s)
if ((current_time - start_time > timeout)); then
echo "Control plane recovery timed out after $timeout seconds."
exit 1
fi
echo "Did not recover all nodes yet, retrying in 5 seconds [$recovered/${{ inputs.controlNodesCount }}]"
sleep 5
done
kubectl wait --for=condition=Ready --timeout=10m --all nodes
env:
KUBECONFIG: ${{ inputs.kubeconfig }}

View File

@ -57,7 +57,7 @@ inputs:
description: "The resource group to use"
required: false
test:
description: "The test to run. Can currently be one of [sonobuoy full, sonobuoy quick, autoscaling, lb, k-bench, verify, nop]."
description: "The test to run. Can currently be one of [sonobuoy full, sonobuoy quick, autoscaling, lb, k-bench, verify, recover, nop]."
required: true
sonobuoyTestSuiteCmd:
description: "The sonobuoy test suite to run."
@ -72,7 +72,7 @@ runs:
using: "composite"
steps:
- name: Check input
if: (!contains(fromJson('["sonobuoy full", "sonobuoy quick", "autoscaling", "k-bench", "verify", "lb", "nop"]'), inputs.test))
if: (!contains(fromJson('["sonobuoy full", "sonobuoy quick", "autoscaling", "k-bench", "verify", "lb", "recover", "nop"]'), inputs.test))
shell: bash
run: |
echo "Invalid input for test field: ${{ inputs.test }}"
@ -212,3 +212,14 @@ runs:
with:
cloudProvider: ${{ inputs.cloudProvider }}
osImage: ${{ inputs.osImage }}
- name: Run recover test
if: inputs.test == 'recover'
uses: ./.github/actions/e2e_recover
with:
controlNodesCount: ${{ inputs.controlNodesCount }}
cloudProvider: ${{ inputs.cloudProvider }}
gcpProject: ${{ inputs.gcpProject }}
kubeconfig: ${{ steps.constellation-create.outputs.kubeconfig }}
masterSecret: ${{ steps.constellation-create.outputs.masterSecret }}
azureResourceGroup: ${{ inputs.azureResourceGroup }}

View File

@ -37,6 +37,7 @@ on:
- "lb"
- "k-bench"
- "verify"
- "recover"
- "nop"
required: true
kubernetesVersion:

View File

@ -45,7 +45,7 @@ jobs:
fail-fast: false
max-parallel: 5
matrix:
test: ["sonobuoy full", "autoscaling", "k-bench", "lb", "verify"]
test: ["sonobuoy full", "autoscaling", "k-bench", "lb", "verify", "recover"]
provider: ["gcp", "azure", "aws"]
version: ["1.23", "1.24", "1.25", "1.26"]
exclude:
@ -56,6 +56,13 @@ jobs:
version: "1.24"
- test: "verify"
version: "1.25"
# Recover test runs only on latest version.
- test: "recover"
version: "1.23"
- test: "recover"
version: "1.24"
- test: "recover"
version: "1.25"
# Autoscaling test runs only on latest version.
- test: "autoscaling"
version: "1.23"

View File

@ -17,7 +17,7 @@ import (
type terraformClient interface {
PrepareWorkspace(path string, input terraform.Variables) error
CreateCluster(ctx context.Context) (string, string, error)
CreateCluster(ctx context.Context) (terraform.CreateOutput, error)
CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error)
DestroyCluster(ctx context.Context) error
CleanUpWorkspace() error

View File

@ -29,6 +29,7 @@ type stubTerraformClient struct {
ip string
initSecret string
iamOutput terraform.IAMOutput
uid string
cleanUpWorkspaceCalled bool
removeInstallerCalled bool
destroyClusterCalled bool
@ -39,8 +40,12 @@ type stubTerraformClient struct {
iamOutputErr error
}
func (c *stubTerraformClient) CreateCluster(ctx context.Context) (string, string, error) {
return c.ip, c.initSecret, c.createClusterErr
func (c *stubTerraformClient) CreateCluster(ctx context.Context) (terraform.CreateOutput, error) {
return terraform.CreateOutput{
IP: c.ip,
Secret: c.initSecret,
UID: c.uid,
}, c.createClusterErr
}
func (c *stubTerraformClient) CreateIAMConfig(ctx context.Context, provider cloudprovider.Provider) (terraform.IAMOutput, error) {

View File

@ -123,15 +123,16 @@ func (c *Creator) createAWS(ctx context.Context, cl terraformClient, config *con
}
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx)
tfOutput, err := cl.CreateCluster(ctx)
if err != nil {
return clusterid.File{}, err
}
return clusterid.File{
CloudProvider: cloudprovider.AWS,
InitSecret: []byte(initSecret),
IP: ip,
InitSecret: []byte(tfOutput.Secret),
IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil
}
@ -160,15 +161,16 @@ func (c *Creator) createGCP(ctx context.Context, cl terraformClient, config *con
}
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx)
tfOutput, err := cl.CreateCluster(ctx)
if err != nil {
return clusterid.File{}, err
}
return clusterid.File{
CloudProvider: cloudprovider.GCP,
InitSecret: []byte(initSecret),
IP: ip,
InitSecret: []byte(tfOutput.Secret),
IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil
}
@ -200,15 +202,16 @@ func (c *Creator) createAzure(ctx context.Context, cl terraformClient, config *c
}
defer rollbackOnError(context.Background(), c.out, &retErr, &rollbackerTerraform{client: cl})
ip, initSecret, err := cl.CreateCluster(ctx)
tfOutput, err := cl.CreateCluster(ctx)
if err != nil {
return clusterid.File{}, err
}
return clusterid.File{
CloudProvider: cloudprovider.Azure,
IP: ip,
InitSecret: []byte(initSecret),
IP: tfOutput.IP,
InitSecret: []byte(tfOutput.Secret),
UID: tfOutput.UID,
}, nil
}
@ -313,14 +316,15 @@ func (c *Creator) createQEMU(ctx context.Context, cl terraformClient, lv libvirt
// Allow rollback of QEMU Terraform workspace from this point on
qemuRollbacker.createdWorkspace = true
ip, initSecret, err := cl.CreateCluster(ctx)
tfOutput, err := cl.CreateCluster(ctx)
if err != nil {
return clusterid.File{}, err
}
return clusterid.File{
CloudProvider: cloudprovider.QEMU,
InitSecret: []byte(initSecret),
IP: ip,
InitSecret: []byte(tfOutput.Secret),
IP: tfOutput.IP,
UID: tfOutput.UID,
}, nil
}

View File

@ -74,39 +74,59 @@ func (c *Client) PrepareWorkspace(path string, vars Variables) error {
}
// CreateCluster creates a Constellation cluster using Terraform.
func (c *Client) CreateCluster(ctx context.Context) (string, string, error) {
func (c *Client) CreateCluster(ctx context.Context) (CreateOutput, error) {
if err := c.tf.Init(ctx); err != nil {
return "", "", err
return CreateOutput{}, err
}
if err := c.tf.Apply(ctx); err != nil {
return "", "", err
return CreateOutput{}, err
}
tfState, err := c.tf.Show(ctx)
if err != nil {
return "", "", err
return CreateOutput{}, err
}
ipOutput, ok := tfState.Values.Outputs["ip"]
if !ok {
return "", "", errors.New("no IP output found")
return CreateOutput{}, errors.New("no IP output found")
}
ip, ok := ipOutput.Value.(string)
if !ok {
return "", "", errors.New("invalid type in IP output: not a string")
return CreateOutput{}, errors.New("invalid type in IP output: not a string")
}
secretOutput, ok := tfState.Values.Outputs["initSecret"]
if !ok {
return "", "", errors.New("no initSecret output found")
return CreateOutput{}, errors.New("no initSecret output found")
}
secret, ok := secretOutput.Value.(string)
if !ok {
return "", "", errors.New("invalid type in initSecret output: not a string")
return CreateOutput{}, errors.New("invalid type in initSecret output: not a string")
}
return ip, secret, nil
uidOutput, ok := tfState.Values.Outputs["uid"]
if !ok {
return CreateOutput{}, errors.New("no uid output found")
}
uid, ok := uidOutput.Value.(string)
if !ok {
return CreateOutput{}, errors.New("invalid type in uid output: not a string")
}
return CreateOutput{
IP: ip,
Secret: secret,
UID: uid,
}, nil
}
// CreateOutput contains the Terraform output values of a cluster creation.
type CreateOutput struct {
IP string
Secret string
UID string
}
// IAMOutput contains the output information of the Terraform IAM operations.

View File

@ -2,6 +2,10 @@ output "ip" {
value = aws_eip.lb.public_ip
}
output "uid" {
value = local.uid
}
output "initSecret" {
value = random_password.initSecret.result
sensitive = true

View File

@ -2,6 +2,10 @@ output "ip" {
value = azurerm_public_ip.loadbalancer_ip.ip_address
}
output "uid" {
value = local.uid
}
output "initSecret" {
value = random_password.initSecret.result
sensitive = true

View File

@ -2,6 +2,10 @@ output "ip" {
value = google_compute_global_address.loadbalancer_ip.address
}
output "uid" {
value = local.uid
}
output "initSecret" {
value = random_password.initSecret.result
sensitive = true

View File

@ -214,6 +214,9 @@ func TestCreateCluster(t *testing.T) {
"initSecret": {
Value: "initSecret",
},
"uid": {
Value: "12345abc",
},
},
},
}
@ -300,6 +303,34 @@ func TestCreateCluster(t *testing.T) {
fs: afero.NewMemMapFs(),
wantErr: true,
},
"no uid": {
pathBase: "terraform",
provider: cloudprovider.QEMU,
vars: qemuVars,
tf: &stubTerraform{
showState: &tfjson.State{
Values: &tfjson.StateValues{
Outputs: map[string]*tfjson.StateOutput{},
},
},
},
fs: afero.NewMemMapFs(),
wantErr: true,
},
"uid has wrong type": {
pathBase: "terraform",
provider: cloudprovider.QEMU,
vars: qemuVars,
tf: &stubTerraform{
showState: &tfjson.State{
Values: &tfjson.StateValues{
Outputs: map[string]*tfjson.StateOutput{"uid": {Value: 42}},
},
},
},
fs: afero.NewMemMapFs(),
wantErr: true,
},
}
for name, tc := range testCases {
@ -315,15 +346,16 @@ func TestCreateCluster(t *testing.T) {
path := path.Join(tc.pathBase, strings.ToLower(tc.provider.String()))
require.NoError(c.PrepareWorkspace(path, tc.vars))
ip, initSecret, err := c.CreateCluster(context.Background())
tfOutput, err := c.CreateCluster(context.Background())
if tc.wantErr {
assert.Error(err)
return
}
assert.NoError(err)
assert.Equal("192.0.2.100", ip)
assert.Equal("initSecret", initSecret)
assert.Equal("192.0.2.100", tfOutput.IP)
assert.Equal("initSecret", tfOutput.Secret)
assert.Equal("12345abc", tfOutput.UID)
})
}
}

View File

@ -9,12 +9,13 @@ gh workflow run e2e-test-manual.yml \
--ref feat/e2e_pipeline \ # On your specific branch!
-F cloudProvider=gcp \ # With your ...
-F controlNodesCount=1 -F workerNodesCount=2 \ # ... settings
-F machineType=n2d-standard-4
-F machineType=n2d-standard-4 \
-F test=nop
```
### E2E Test Suites
Here are some examples for test suits you might want to run. Values for `sonobuoyTestSuiteCmd`:
Here are some examples for test suites you might want to run. Values for `sonobuoyTestSuiteCmd`:
* `--mode quick`
* Runs a set of tests that are known to be quick to execute! (<1 min)