constellation/.github/actions/constellation_create/action.yml

216 lines
8.0 KiB
YAML
Raw Normal View History

name: Constellation create
description: Create a new Constellation cluster using latest OS image.
inputs:
# TODO(elchead): remove once 2.10 is released
workerNodesCount:
description: "Number of worker nodes to spawn."
required: true
controlNodesCount:
description: "Number of control-plane nodes to spawn."
required: true
cloudProvider:
description: "Either 'gcp', 'aws' or 'azure'."
required: true
machineType:
description: "Machine type of VM to spawn."
required: false
cliVersion:
description: "Version of the CLI"
required: true
osImage:
2022-11-23 03:41:42 -05:00
description: "OS image to use."
required: true
isDebugImage:
description: "Is OS img a debug img?"
required: true
kubernetesVersion:
description: "Kubernetes version to create the cluster from."
required: false
artifactNameSuffix:
description: "Suffix for artifact naming."
required: true
fetchMeasurements:
default: "false"
description: "Update measurements via the 'constellation config fetch-measurements' command."
azureSNPEnforcementPolicy:
required: false
description: "Azure SNP enforcement policy."
outputs:
kubeconfig:
description: "The kubeconfig for the cluster."
value: ${{ steps.constellation-init.outputs.KUBECONFIG }}
osImageUsed:
description: "The OS image used in the cluster."
value: ${{ steps.setImage.outputs.image }}
runs:
using: "composite"
steps:
- name: Set constellation name
shell: bash
run: |
yq eval -i "(.name) = \"e2e-test\"" constellation-conf.yaml
- name: Set Azure SNP enforcement policy
if: inputs.azureSNPEnforcementPolicy != ''
shell: bash
run: |
if [[ ${{ inputs.cloudProvider }} != 'azure' ]]; then
echo "SNP enforcement policy is only supported for Azure"
exit 1
fi
yq eval -i "(.attestation.azureSEVSNP.firmwareSignerConfig.enforcementPolicy) \
= \"${{ inputs.azureSNPEnforcementPolicy }}\"" constellation-conf.yaml
- name: Set image
id: setImage
shell: bash
env:
imageInput: ${{ inputs.osImage }}
run: |
if [[ -z "${imageInput}" ]]; then
echo "No image specified. Using default image from config."
image=$(yq eval ".image" constellation-conf.yaml)
echo "image=${image}" | tee -a "$GITHUB_OUTPUT"
exit 0
fi
yq eval -i "(.image) = \"${imageInput}\"" constellation-conf.yaml
echo "image=${imageInput}" | tee -a "$GITHUB_OUTPUT"
- name: Update measurements for non-stable images
if: inputs.fetchMeasurements
shell: bash
run: |
constellation config fetch-measurements --debug --insecure
- name: Set instanceType
if: inputs.machineType && inputs.machineType != 'default'
shell: bash
run: |
yq eval -i "(.nodeGroups[] | .instanceType) = \"${{ inputs.machineType }}\"" constellation-conf.yaml
- name: Set node count
# TODO(katexochen): remove if statement once 2.10 is released.
if: inputs.cliVersion != 'v2.9.0' && inputs.cliVersion != 'v2.9.1'
shell: bash
run: |
yq eval -i "(.nodeGroups[] | select(.role == \"control-plane\") | .initialCount) = ${{ inputs.controlNodesCount }}" constellation-conf.yaml
yq eval -i "(.nodeGroups[] | select(.role == \"worker\") | .initialCount) = ${{ inputs.workerNodesCount }}" constellation-conf.yaml
- name: Enable debugCluster flag
if: inputs.isDebugImage == 'true'
shell: bash
run: |
yq eval -i '(.debugCluster) = true' constellation-conf.yaml
# Uses --force flag since the CLI currently does not have a pre-release version and is always on the latest released version.
# However, many of our pipelines work on prerelease images. Thus the used images are newer than the CLI's version.
# This makes the version validation in the CLI fail.
- name: Constellation create
shell: bash
run: |
echo "Creating cluster using config:"
cat constellation-conf.yaml
2022-12-13 04:07:09 -05:00
sudo sh -c 'echo "127.0.0.1 license.confidential.cloud" >> /etc/hosts' || true
output=$(constellation --help)
if [[ $output == *"tf-log"* ]]; then
TFFLAG="--tf-log=DEBUG"
fi
constellation create -y --force --debug ${TFFLAG:-} -c ${{ inputs.controlNodesCount }} -w ${{ inputs.workerNodesCount }}
# TODO(elchead): remove -c and -w once 2.10 is released, such that a fromVersion upgrade E2E no longer requires these flags
- name: Cdbg deploy
if: inputs.isDebugImage == 'true'
shell: bash
run: |
2022-09-02 06:44:20 -04:00
echo "::group::cdbg deploy"
2022-09-14 11:41:47 -04:00
chmod +x $GITHUB_WORKSPACE/build/cdbg
cdbg deploy \
--bootstrapper "${{ github.workspace }}/build/bootstrapper" \
--upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \
--info logcollect=true \
--info logcollect.github.actor="${{ github.triggering_actor }}" \
--info logcollect.github.workflow="${{ github.workflow }}" \
--info logcollect.github.run-id="${{ github.run_id }}" \
--info logcollect.github.run-attempt="${{ github.run_attempt }}" \
--info logcollect.github.ref-name="${{ github.ref_name }}" \
--info logcollect.github.sha="${{ github.sha }}" \
--info logcollect.github.runner-os="${{ runner.os }}" \
--verbosity=-1 \
--force
2022-09-02 06:44:20 -04:00
echo "::endgroup::"
- name: Constellation init
id: constellation-init
shell: bash
run: |
constellation init --force --debug
echo "KUBECONFIG=$(pwd)/constellation-admin.conf" | tee -a $GITHUB_OUTPUT
- name: Wait for nodes to join and become ready
shell: bash
env:
KUBECONFIG: "${{ steps.constellation-init.outputs.KUBECONFIG }}"
JOINTIMEOUT: "1200" # 20 minutes timeout for all nodes to join
run: |
echo "::group::Wait for nodes"
NODES_COUNT=$((${{ inputs.controlNodesCount }} + ${{ inputs.workerNodesCount }}))
JOINWAIT=0
until [[ "$(kubectl get nodes -o json | jq '.items | length')" == "${NODES_COUNT}" ]] || [[ $JOINWAIT -gt $JOINTIMEOUT ]];
do
echo "$(kubectl get nodes -o json | jq '.items | length')/"${NODES_COUNT}" nodes have joined.. waiting.."
JOINWAIT=$((JOINWAIT+30))
sleep 30
done
if [[ $JOINWAIT -gt $JOINTIMEOUT ]]; then
echo "Timed out waiting for nodes to join"
exit 1
fi
echo "$(kubectl get nodes -o json | jq '.items | length')/"${NODES_COUNT}" nodes have joined"
if ! kubectl wait --for=condition=ready --all nodes --timeout=20m; then
kubectl get pods -n kube-system
kubectl get events -n kube-system
echo "::error::kubectl wait timed out before all nodes became ready"
echo "::endgroup::"
exit 1
fi
echo "::endgroup::"
- name: Download boot logs
if: always()
continue-on-error: true
shell: bash
env:
CSP: ${{ inputs.cloudProvider }}
run: |
echo "::group::Download boot logs"
CONSTELL_UID=$(yq '.uid' constellation-id.json)
case $CSP in
azure)
AZURE_RESOURCE_GROUP=$(yq eval ".provider.azure.resourceGroup" constellation-conf.yaml)
./.github/actions/constellation_create/az-logs.sh ${AZURE_RESOURCE_GROUP}
;;
gcp)
GCP_ZONE=$(yq eval ".provider.gcp.zone" constellation-conf.yaml)
./.github/actions/constellation_create/gcp-logs.sh ${GCP_ZONE} ${CONSTELL_UID}
;;
aws)
./.github/actions/constellation_create/aws-logs.sh us-east-2 ${CONSTELL_UID}
;;
esac
echo "::endgroup::"
- name: Upload boot logs
if: always() && !env.ACT
continue-on-error: true
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
with:
name: serial-logs-${{ inputs.artifactNameSuffix }}
path: |
*.log
!terraform.log