ci: collect cluster metrics to OpenSearch (#2347)

* add Metricbeat deployment to debugd

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* set metricbeat debugd image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix k8s deployment

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* use 2 separate deployments

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* only deploy via k8s in non-debug-images

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add missing tilde

* remove k8s metrics

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* unify flag

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add cloud metadata processor to filebeat

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* ci: fix debugd logcollection (#2355)

* add missing keyvault access role

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* bump logstash image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* bump filebeat / metricbeat image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* log used image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* use debugging image versions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* increase wait timeout for image upload

* add cloud metadata processor to filebeat

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix template locations in container

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix image version typo

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add filebeat / metricbeat users

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove user additions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* update workflow step name

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* only mount config files

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* document potential rc

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix IAM permissions in workflow

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix AWS permissions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* tidy

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add missing workflow input

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* rename action

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* pin image versions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove unnecessary workflow inputs

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

---------

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add refStream input

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove inputs.yml dep

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* increase system metric period

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix linkchecker

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

---------

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>
This commit is contained in:
Moritz Sanft 2023-09-27 16:17:31 +02:00 committed by GitHub
parent c7b728f202
commit f4b2d02194
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 699 additions and 149 deletions

100
.github/actions/cdbg_deploy/action.yml vendored Normal file
View File

@ -0,0 +1,100 @@
name: Cdbg deploy
description: Deploy the Constellation Bootstrapper to the cluster via the debugd.
inputs:
test:
description: "The e2e test payload."
required: true
azureClusterCreateCredentials:
description: "Azure credentials authorized to create a Constellation cluster."
required: true
azureIAMCreateCredentials:
description: "Azure credentials authorized to create an IAM configuration."
required: true
cloudProvider:
description: "The cloud provider to use."
required: true
kubernetesVersion:
description: "Kubernetes version to create the cluster from."
required: true
refStream:
description: "The refStream of the image the test runs on."
required: true
runs:
using: "composite"
steps:
- name: Login to Azure (IAM service principal)
if: inputs.cloudProvider == 'azure'
uses: ./.github/actions/login_azure
with:
azure_credentials: ${{ inputs.azureIAMCreateCredentials }}
- name: Add Azure Keyvault access role
if: inputs.cloudProvider == 'azure'
shell: bash
run: |
UAMI=$(yq eval ".provider.azure.userAssignedIdentity | upcase" constellation-conf.yaml)
PRINCIPAL_ID=$(az identity list | yq ".[] | select(.id | test(\"(?i)$UAMI\"; \"g\")) | .principalId")
az role assignment create --role "Key Vault Secrets User" \
--assignee "$PRINCIPAL_ID" \
--scope /subscriptions/0d202bbb-4fa7-4af8-8125-58c269a05435/resourceGroups/e2e-test-creds/providers/Microsoft.KeyVault/vaults/opensearch-creds
- name: Login to Azure (Cluster service principal)
if: inputs.cloudProvider == 'azure'
uses: ./.github/actions/login_azure
with:
azure_credentials: ${{ inputs.azureClusterCreateCredentials }}
- name: Login to AWS (IAM service principal)
if: inputs.cloudProvider == 'aws'
uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0
with:
role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2EIAM
aws-region: eu-central-1
# extend token expiry to 6 hours to ensure constellation can terminate
role-duration-seconds: 21600
- name: Add AWS Secrets Manager access role
if: inputs.cloudProvider == 'aws'
shell: bash
run: |
INSTANCE_PROFILE=$(yq eval ".provider.aws.iamProfileControlPlane" constellation-conf.yaml)
ROLE_NAME=$(aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE" | yq ".InstanceProfile.Roles[0].RoleName")
aws iam attach-role-policy \
--role-name "$ROLE_NAME" \
--policy-arn arn:aws:iam::795746500882:policy/GitHubActionsOSCredAccess
- name: Login to AWS (Cluster service principal)
if: inputs.cloudProvider == 'aws'
uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0
with:
role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2ECluster
aws-region: eu-central-1
# extend token expiry to 6 hours to ensure constellation can terminate
role-duration-seconds: 21600
- name: Cdbg deploy
shell: bash
run: |
echo "::group::cdbg deploy"
chmod +x $GITHUB_WORKSPACE/build/cdbg
cdbg deploy \
--bootstrapper "${{ github.workspace }}/build/bootstrapper" \
--upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \
--info logcollect=true \
--info logcollect.github.actor="${{ github.triggering_actor }}" \
--info logcollect.github.workflow="${{ github.workflow }}" \
--info logcollect.github.run-id="${{ github.run_id }}" \
--info logcollect.github.run-attempt="${{ github.run_attempt }}" \
--info logcollect.github.ref-name="${{ github.ref_name }}" \
--info logcollect.github.sha="${{ github.sha }}" \
--info logcollect.github.runner-os="${{ runner.os }}" \
--info logcollect.github.e2e-test-payload="${{ inputs.test }}" \
--info logcollect.github.is-debug-cluster=false \
--info logcollect.github.ref-stream="${{ inputs.refStream }}" \
--info logcollect.github.kubernetes-version="${{ inputs.kubernetesVersion }}" \
--info logcollect.deployment-type="debugd" \
--verbosity=-1 \
--force
echo "::endgroup::"

View File

@ -38,6 +38,15 @@ inputs:
test:
description: "The e2e test payload."
required: true
azureClusterCreateCredentials:
description: "Azure credentials authorized to create a Constellation cluster."
required: true
azureIAMCreateCredentials:
description: "Azure credentials authorized to create an IAM configuration."
required: true
refStream:
description: "Reference and stream of the image in use"
required: false
outputs:
kubeconfig:
@ -119,29 +128,14 @@ runs:
- name: Cdbg deploy
if: inputs.isDebugImage == 'true'
shell: bash
run: |
echo "::group::cdbg deploy"
chmod +x $GITHUB_WORKSPACE/build/cdbg
cdbg deploy \
--bootstrapper "${{ github.workspace }}/build/bootstrapper" \
--upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \
--info logcollect=true \
--info logcollect.github.actor="${{ github.triggering_actor }}" \
--info logcollect.github.workflow="${{ github.workflow }}" \
--info logcollect.github.run-id="${{ github.run_id }}" \
--info logcollect.github.run-attempt="${{ github.run_attempt }}" \
--info logcollect.github.ref-name="${{ github.ref_name }}" \
--info logcollect.github.sha="${{ github.sha }}" \
--info logcollect.github.runner-os="${{ runner.os }}" \
--info logcollect.github.e2e-test-payload="${{ inputs.test }}" \
--info logcollect.github.is-debug-cluster=false \
--info logcollect.github.ref-stream="${{ inputs.refStream }}" \
--info logcollect.github.kubernetes-version="${{ inputs.kubernetesVersion }}" \
--info logcollect.deployment-type="debugd" \
--verbosity=-1 \
--force
echo "::endgroup::"
uses: ./.github/actions/cdbg_deploy
with:
cloudProvider: ${{ inputs.cloudProvider }}
test: ${{ inputs.test }}
azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }}
azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }}
refStream: ${{ inputs.refStream }}
kubernetesVersion: ${{ inputs.kubernetesVersion }}
- name: Constellation init
id: constellation-init

View File

@ -50,7 +50,7 @@ runs:
--fields github.sha="${{ github.sha }}" \
--fields github.runner-os="${{ runner.os }}" \
--fields github.e2e-test-payload="${{ inputs.test }}" \
--fields github.isDebugImage="${{ inputs.isDebugImage }}" \
--fields github.is-debug-cluster="${{ inputs.isDebugImage }}" \
--fields github.e2e-test-provider="${{ inputs.provider }}" \
--fields github.ref-stream="${{ inputs.refStream }}" \
--fields github.kubernetes-version="${{ inputs.kubernetesVersion }}" \
@ -86,3 +86,17 @@ runs:
helm repo update
helm install filebeat elastic/filebeat \
--wait --timeout=1200s --values values.yml
- name: Deploy Metricbeat
id: deploy-metricbeat
shell: bash
working-directory: ./metricbeat
env:
KUBECONFIG: ${{ inputs.kubeconfig }}
run: |
helm repo add elastic https://helm.elastic.co
helm repo update
helm install metricbeat-k8s elastic/metricbeat \
--wait --timeout=1200s --values values-control-plane.yml
helm install metricbeat-system elastic/metricbeat \
--wait --timeout=1200s --values values-all-nodes.yml

View File

@ -249,12 +249,14 @@ runs:
fetchMeasurements: ${{ inputs.fetchMeasurements }}
cliVersion: ${{ inputs.cliVersion }}
azureSNPEnforcementPolicy: ${{ inputs.azureSNPEnforcementPolicy }}
azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }}
azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }}
kubernetesVersion: ${{ inputs.kubernetesVersion }}
refStream: ${{ inputs.refStream }}
- name: Deploy logcollection
- name: Deploy log- and metrics-collection (Kubernetes)
id: deploy-logcollection
# TODO(msanft):temporarily deploy in debug clusters too to resolve "missing logs"-bug
# see https://dev.azure.com/Edgeless/Edgeless/_workitems/edit/3227
# if: inputs.isDebugImage == 'false'
if: inputs.isDebugImage == 'false'
uses: ./.github/actions/deploy_logcollection
with:
kubeconfig: ${{ steps.constellation-create.outputs.kubeconfig }}

View File

@ -37,3 +37,10 @@ jobs:
name: filebeat-debugd
dockerfile: debugd/filebeat/Dockerfile
githubToken: ${{ secrets.GITHUB_TOKEN }}
- name: Build and upload Metricbeat container image
uses: ./.github/actions/build_micro_service
with:
name: metricbeat-debugd
dockerfile: debugd/metricbeat/Dockerfile
githubToken: ${{ secrets.GITHUB_TOKEN }}

View File

@ -3,10 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "filebeat",
srcs = ["assets.go"],
embedsrcs = [
"templates/filebeat.yml",
"inputs.yml",
],
embedsrcs = ["templates/filebeat.yml"],
importpath = "github.com/edgelesssys/constellation/v2/debugd/filebeat",
visibility = ["//visibility:public"],
)

View File

@ -2,7 +2,6 @@ FROM fedora:38@sha256:6fc00f83a1b6526b1c6562e30f552d109ba8e269259c6742a26efab1b7
RUN dnf install -y https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.6.2-x86_64.rpm
COPY debugd/filebeat/inputs.yml /usr/share/filebeat/inputs.yml
COPY debugd/filebeat/templates/ /usr/share/filebeat/templates/
COPY debugd/filebeat/templates/ /usr/share/constellogs/templates/
ENTRYPOINT ["/usr/share/filebeat/bin/filebeat", "-e", "--path.home", "/usr/share/filebeat", "--path.data", "/usr/share/filebeat/data"]

View File

@ -10,6 +10,5 @@ import "embed"
// Assets are the exported Filebeat template files.
//
//go:embed *.yml
//go:embed templates/*
var Assets embed.FS

View File

@ -1,9 +0,0 @@
- type: journald
enabled: true
id: journald
- type: filestream
enabled: true
id: container
paths:
- /var/log/pods/*/*/*.log

View File

@ -9,12 +9,15 @@ logging:
metrics.enabled: false
level: warning
filebeat.config:
inputs:
filebeat.inputs:
- type: journald
enabled: true
path: /usr/share/filebeat/inputs.yml
# reload.enabled: true
# reload.period: 10s
id: journald
- type: filestream
enabled: true
id: container
paths:
- /var/log/pods/*/*/*.log
timestamp.precision: nanosecond
@ -27,3 +30,6 @@ processors:
field: "log.file.path"
target_prefix: "kubernetes"
ignore_failure: true
{{ if .AddCloudMetadata }}
- add_cloud_metadata: ~
{{ end }}

View File

@ -32,7 +32,7 @@ const (
// NewStartTrigger returns a trigger func can be registered with an infos instance.
// The trigger is called when infos changes to received state and starts a log collection pod
// with filebeat and logstash in case the flags are set.
// with filebeat, metricbeat and logstash in case the flags are set.
//
// This requires podman to be installed.
func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprovider.Provider,
@ -74,7 +74,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
return
}
logger.Infof("Getting logstash pipeline template")
logger.Infof("Getting logstash pipeline template from image %s", versions.LogstashImage)
tmpl, err := getTemplate(ctx, logger, versions.LogstashImage, "/run/logstash/templates/pipeline.conf", "/run/logstash")
if err != nil {
logger.Errorf("Getting logstash pipeline template: %v", err)
@ -97,28 +97,46 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
InfoMap: infoMapM,
Credentials: creds,
}
if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, pipelineConf); err != nil {
logger.Errorf("Writing logstash pipeline: %v", err)
if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, pipelineConf); err != nil {
logger.Errorf("Writing logstash config: %v", err)
return
}
logger.Infof("Getting logstash config template")
logger.Infof("Getting filebeat config template from image %s", versions.FilebeatImage)
tmpl, err = getTemplate(ctx, logger, versions.FilebeatImage, "/run/filebeat/templates/filebeat.yml", "/run/filebeat")
if err != nil {
logger.Errorf("Getting filebeat config template: %v", err)
return
}
filebeatConf := filebeatConfInput{
LogstashHost: "localhost:5044",
LogstashHost: "localhost:5044",
AddCloudMetadata: true,
}
if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, filebeatConf); err != nil {
logger.Errorf("Writing filebeat config: %v", err)
if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, filebeatConf); err != nil {
logger.Errorf("Writing filebeat pipeline: %v", err)
return
}
logger.Infof("Getting metricbeat config template from image %s", versions.MetricbeatImage)
tmpl, err = getTemplate(ctx, logger, versions.MetricbeatImage, "/run/metricbeat/templates/metricbeat.yml", "/run/metricbeat")
if err != nil {
logger.Errorf("Getting metricbeat config template: %v", err)
return
}
metricbeatConf := metricbeatConfInput{
LogstashHost: "localhost:5044",
Port: 5066,
CollectSystemMetrics: true,
AddCloudMetadata: true,
}
if err := writeTemplate("/run/metricbeat/metricbeat.yml", tmpl, metricbeatConf); err != nil {
logger.Errorf("Writing metricbeat pipeline: %v", err)
return
}
logger.Infof("Starting log collection pod")
if err := startPod(ctx, logger); err != nil {
logger.Errorf("Starting filebeat: %v", err)
logger.Errorf("Starting log collection: %v", err)
}
}()
}
@ -170,7 +188,7 @@ func getTemplate(ctx context.Context, logger *logger.Logger, image, templateDir,
}
func startPod(ctx context.Context, logger *logger.Logger) error {
// create a shared pod for filebeat and logstash
// create a shared pod for filebeat, metricbeat and logstash
createPodArgs := []string{
"pod",
"create",
@ -215,7 +233,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
"--volume=/run/systemd:/run/systemd:ro",
"--volume=/run/systemd/journal/socket:/run/systemd/journal/socket:rw",
"--volume=/run/state/var/log:/var/log:ro",
"--volume=/run/filebeat:/usr/share/filebeat/:ro",
"--volume=/run/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro",
versions.FilebeatImage,
}
runFilebeatCmd := exec.CommandContext(ctx, "podman", runFilebeatArgs...)
@ -226,6 +244,28 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
return fmt.Errorf("failed to run filebeat: %w", err)
}
// start metricbeat container
metricbeatLog := newCmdLogger(logger.Named("metricbeat"))
runMetricbeatArgs := []string{
"run",
"--rm",
"--name=metricbeat",
"--pod=logcollection",
"--privileged",
"--log-driver=none",
"--volume=/proc:/hostfs/proc:ro",
"--volume=/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro",
"--volume=/run/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro",
versions.MetricbeatImage,
}
runMetricbeatCmd := exec.CommandContext(ctx, "podman", runMetricbeatArgs...)
logger.Infof("Run metricbeat command: %v", runMetricbeatCmd.String())
runMetricbeatCmd.Stdout = metricbeatLog
runMetricbeatCmd.Stderr = metricbeatLog
if err := runMetricbeatCmd.Start(); err != nil {
return fmt.Errorf("failed to run metricbeat: %w", err)
}
return nil
}
@ -238,7 +278,16 @@ type logstashConfInput struct {
}
type filebeatConfInput struct {
LogstashHost string
LogstashHost string
AddCloudMetadata bool
}
type metricbeatConfInput struct {
Port int
LogstashHost string
CollectEtcdMetrics bool
CollectSystemMetrics bool
AddCloudMetadata bool
}
func writeTemplate(path string, templ *template.Template, in any) error {

View File

@ -1,66 +0,0 @@
input {
beats {
host => "0.0.0.0"
port => 5044
}
}
filter {
mutate {
# Remove some fields that are not needed.
remove_field => [
"[agent]",
"[journald]",
"[syslog]",
"[systemd][invocation_id]",
"[event][original]",
"[log][offset]",
"[log][syslog]"
]
# Tag with the provided metadata.
add_field => {
{{ range $key, $value := .InfoMap }}
"[metadata][{{ $key }}]" => "{{ $value }}"
{{ end }}
}
}
# Parse structured logs for following systemd units.
if [systemd][unit] in ["bootstrapper.service", "constellation-bootstrapper.service"] {
# skip_on_invalid_json below does not skip the whole filter, so let's use a cheap workaround here.
# See:
# https://discuss.elastic.co/t/skip-on-invalid-json-skipping-all-filters/215195
# https://discuss.elastic.co/t/looking-for-a-way-to-detect-json/102263
if [message] =~ "\A\{.+\}\z" {
json {
source => "message"
target => "logs"
skip_on_invalid_json => true
}
mutate {
replace => {
"message" => "%{[logs][msg]}"
}
remove_field => [
"[logs][msg]",
"[logs][ts]"
]
}
de_dot {
fields => ["[logs][peer.address]"]
}
}
}
}
output {
opensearch {
hosts => "{{ .Host }}"
index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}"
user => "{{ .Credentials.Username }}"
password => "{{ .Credentials.Password }}"
ssl => true
ssl_certificate_verification => true
}
}

View File

@ -57,7 +57,7 @@ filter {
output {
opensearch {
hosts => "{{ .Host }}"
index => "systemd-logs-%{+YYYY.MM.dd}"
index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}"
user => "{{ .Credentials.Username }}"
password => "{{ .Credentials.Password }}"
ssl => true

View File

@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "metricbeat",
srcs = ["assets.go"],
embedsrcs = ["templates/metricbeat.yml"],
importpath = "github.com/edgelesssys/constellation/v2/debugd/metricbeat",
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,15 @@
FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158cc2399 AS release
RUN dnf install -y https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-8.9.2-x86_64.rpm
COPY debugd/metricbeat/templates/ /usr/share/constellogs/templates/
# -e enables logging to stderr
# -E http.enabled=true enables http endpoint as seen in https://github.com/elastic/helm-charts/blob/main/metricbeat/templates/daemonset.yaml
# --path.home and --path.data set the paths to the metricbeat binary and data directory
ENTRYPOINT [ "/usr/share/metricbeat/bin/metricbeat", \
"-e", \
"-E", "http.enabled=true", \
"--path.home", "/usr/share/metricbeat", \
"--path.data", "/usr/share/metricbeat/data" \
]

View File

@ -0,0 +1,14 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package metricbeat
import "embed"
// Assets are the exported Metricbeat template files.
//
//go:embed templates/*
var Assets embed.FS

View File

@ -0,0 +1,63 @@
http.port: {{ .Port }}
output.logstash:
hosts: ["{{ .LogstashHost }}"]
output.console:
enabled: false
logging:
to_files: false
metrics.enabled: false
level: warning
timestamp.precision: nanosecond
# https://www.elastic.co/guide/en/beats/metricbeat/current/configuration-path.html#_system_hostfs
system.hostfs: "/hostfs"
metricbeat.modules:
{{ if .CollectSystemMetrics }}
- module: system
enabled: true
metricsets:
- cpu # CPU usage
- load # CPU load averages
- memory # Memory usage
- network # Network IO
- process # Per process metrics
- process_summary # Process summary
#- uptime # System Uptime
#- socket_summary # Socket summary
#- core # Per CPU core usage
- diskio # Disk IO
- filesystem # File system usage for each mountpoint
- fsstat # File system summary metrics
#- raid # Raid
#- socket # Sockets and connection info (linux only)
#- service # systemd service information
cpu.metrics: ["percentages","normalized_percentages"]
period: 10s
processes: ['.*']
# To monitor host metrics from within a container. As per:
# https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-system.html#_example_configuration_59
hostfs: "/hostfs"
{{ end }}
{{ if .CollectEtcdMetrics }}
- module: etcd
enabled: true
metricsets: ["metrics"]
period: 30s
hosts: ["https://localhost:2379"]
ssl:
certificate_authorities: ["/etc/kubernetes/pki/etcd/ca.crt"]
certificate: "/etc/kubernetes/pki/etcd/peer.crt"
key: "/etc/kubernetes/pki/etcd/peer.key"
verification_mode: "full"
{{ end }}
processors:
{{ if .AddCloudMetadata }}
- add_cloud_metadata: ~
{{ end }}

View File

@ -8,6 +8,14 @@ The logcollection functionality can be deployed to both [debug](./debug-cluster.
In debug clusters, logcollection functionality should be deployed automatically through the debug daemon `debugd`, which runs *before* the bootstrapper
and can therefore, contrary to non-debug clusters, also collect logs of the bootstrapper.
> [!WARNING]
> If logs from a E2E test run for a debug-cluster with a bootstrapping-failure are missing in OpenSearch, this might be caused by a race condition
> between the termination of the cluster and the start-up of the logcollection containers in the debugd.
> If the failure can be reproduced manually, it is best to do so and observe the serial console of the bootstrapping node with the following command until the logcollection containers have started.
> ```bash
> journalctl _SYSTEMD_UNIT=debugd.service | grep > logcollect
> ```
## Deployment in Non-Debug Clusters
In non-debug clusters, logcollection functionality needs to be explicitly deployed as a Kubernetes Deployment through Helm. To do that, a few steps need to be followed:
@ -29,28 +37,39 @@ In non-debug clusters, logcollection functionality needs to be explicitly deploy
One can add additional key-value pairs to the configuration by appending `--info key=value` to the command.
These key-value pairs will be attached to the log entries and can be used to filter them in OpenSearch.
For example, it might be helpful to add a `test=<xyz>` tag to be able to filter out logs from a specific test run.
2. Add the Elastic Helm repository
```bash
helm repo add elastic https://helm.elastic.co
helm repo update
```
2. Deploy Logstash
```bash
cd logstash
make add
make install
helm install logstash elastic/logstash \
--wait --timeout=1200s --values values.yml
cd ..
```
This will add the required Logstash Helm charts and deploy them to your cluster.
3. Deploy Filebeat
2. Deploy Beats
```bash
cd metricbeat
helm install metricbeat-k8s elastic/metricbeat \
--wait --timeout=1200s --values values-control-plane.yml
helm install metricbeat-system elastic/metricbeat \
--wait --timeout=1200s --values values-all-nodes.yml
cd ..
cd filebeat
make add
make install
helm install filebeat elastic/filebeat \
--wait --timeout=1200s --values values.yml
cd ..
```
This will add the required Filebeat Helm charts and deploy them to your cluster.
This will add the required Filebeat and Metricbeat Helm charts and deploy them to your cluster.
To remove Logstash or Filebeat, `cd` into the corresponding directory and run `make remove`.
To remove Logstash or one of the beats, `cd` into the corresponding directory and run `helm uninstall {logstash,filebeat,metricbeat}`.
## Inspecting Logs in OpenSearch

View File

@ -2,7 +2,7 @@
[Asciinema](https://github.com/asciinema/asciinema) is used to automatically generate
terminal session recordings for our documentation. To fully automate this we use scripts
that utilize [expect](https://linux.die.net/man/1/expect) to interface with different
that utilize [expect](https://manpages.debian.org/testing/expect/expect.1.en.html) to interface with different
CLI tools, and run them inside a [container](docker/Dockerfile).
## Usage

View File

@ -63,6 +63,13 @@ func runTemplate(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("prepare filebeat: %w", err)
}
metricbeatPreparer := internal.NewMetricbeatPreparer(
flags.port,
)
if err := metricbeatPreparer.Prepare(flags.dir); err != nil {
return fmt.Errorf("prepare metricbeat: %w", err)
}
return nil
}

View File

@ -5,17 +5,21 @@ go_library(
srcs = [
"filebeat.go",
"logstash.go",
"metricbeat.go",
"prepare.go",
],
embedsrcs = [
"templates/filebeat/values.yml",
"templates/logstash/values.yml",
"templates/metricbeat/values-all-nodes.yml",
"templates/metricbeat/values-control-plane.yml",
],
importpath = "github.com/edgelesssys/constellation/v2/hack/logcollector/internal",
visibility = ["//hack/logcollector:__subpackages__"],
deps = [
"//debugd/filebeat",
"//debugd/logstash",
"//debugd/metricbeat",
"//internal/file",
"@com_github_spf13_afero//:afero",
"@in_gopkg_yaml_v3//:yaml_v3",

View File

@ -41,17 +41,13 @@ func NewFilebeatPreparer(port int) *FilebeatPreparer {
// Prepare prepares the Filebeat Helm chart by templating the filebeat.yml and inputs.yml files and placing them in the specified directory.
func (p *FilebeatPreparer) Prepare(dir string) error {
templatedFilebeatYaml, err := p.template(filebeatAssets, "templates/filebeat.yml", FilebeatTemplateData{
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
AddCloudMetadata: true,
})
if err != nil {
return fmt.Errorf("template filebeat.yml: %w", err)
}
inputsYaml, err := filebeatAssets.ReadFile("inputs.yml")
if err != nil {
return fmt.Errorf("read log4j2.properties: %w", err)
}
rawHelmValues, err := filebeatHelmAssets.ReadFile("templates/filebeat/values.yml")
if err != nil {
return fmt.Errorf("read values.yml: %w", err)
@ -63,8 +59,6 @@ func (p *FilebeatPreparer) Prepare(dir string) error {
}
helmValuesYaml.Daemonset.FilebeatConfig.FilebeatYml = templatedFilebeatYaml.String()
helmValuesYaml.Daemonset.FilebeatConfig.InputsYml = string(inputsYaml)
helmValues, err := yaml.Marshal(helmValuesYaml)
if err != nil {
return fmt.Errorf("marshal values.yml: %w", err)
@ -79,7 +73,8 @@ func (p *FilebeatPreparer) Prepare(dir string) error {
// FilebeatTemplateData is template data.
type FilebeatTemplateData struct {
LogstashHost string
LogstashHost string
AddCloudMetadata bool
}
// FilebeatHelmValues repesents the Helm values.yml.
@ -90,7 +85,6 @@ type FilebeatHelmValues struct {
Enabled bool `yaml:"enabled"`
FilebeatConfig struct {
FilebeatYml string `yaml:"filebeat.yml"`
InputsYml string `yaml:"inputs.yml"`
} `yaml:"filebeatConfig"`
ExtraEnvs []interface{} `yaml:"extraEnvs"`
SecretMounts []interface{} `yaml:"secretMounts"`

View File

@ -0,0 +1,164 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package internal
import (
"embed"
"fmt"
"path/filepath"
"github.com/edgelesssys/constellation/v2/debugd/metricbeat"
"github.com/edgelesssys/constellation/v2/internal/file"
"github.com/spf13/afero"
"gopkg.in/yaml.v3"
)
var (
//go:embed templates/metricbeat/*
metricbeatHelmAssets embed.FS
metricbeatAssets = metricbeat.Assets
)
// MetricbeatPreparer prepares the Metricbeat Helm chart.
type MetricbeatPreparer struct {
fh file.Handler
port int
templatePreparer
}
// NewMetricbeatPreparer returns a new MetricbeatPreparer.
func NewMetricbeatPreparer(port int) *MetricbeatPreparer {
return &MetricbeatPreparer{
fh: file.NewHandler(afero.NewOsFs()),
port: port,
}
}
// Prepare prepares the Filebeat Helm chart by templating the metricbeat.yml file and placing it
// in the specified directory.
func (p *MetricbeatPreparer) Prepare(dir string) error {
templatedSystemMetricbeatYaml, err := p.template(metricbeatAssets, "templates/metricbeat.yml", MetricbeatTemplateData{
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
Port: 5066,
CollectSystemMetrics: true,
AddCloudMetadata: true,
})
if err != nil {
return fmt.Errorf("template system metricbeat.yml: %w", err)
}
templatedK8sMetricbeatYaml, err := p.template(metricbeatAssets, "templates/metricbeat.yml", MetricbeatTemplateData{
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
Port: 5067,
CollectEtcdMetrics: true,
AddCloudMetadata: true,
})
if err != nil {
return fmt.Errorf("template k8s metricbeat.yml: %w", err)
}
rawAllNodesHelmValues, err := metricbeatHelmAssets.ReadFile("templates/metricbeat/values-all-nodes.yml")
if err != nil {
return fmt.Errorf("read values-all-nodes.yml: %w", err)
}
rawControlPlaneHelmValues, err := metricbeatHelmAssets.ReadFile("templates/metricbeat/values-control-plane.yml")
if err != nil {
return fmt.Errorf("read values-control-plane.yml: %w", err)
}
allNodesHelmValuesYaml := &MetricbeatHelmValues{}
if err := yaml.Unmarshal(rawAllNodesHelmValues, allNodesHelmValuesYaml); err != nil {
return fmt.Errorf("unmarshal values-all-nodes.yml: %w", err)
}
controlPlaneHelmValuesYaml := &MetricbeatHelmValues{}
if err := yaml.Unmarshal(rawControlPlaneHelmValues, controlPlaneHelmValuesYaml); err != nil {
return fmt.Errorf("unmarshal values-control-plane.yml: %w", err)
}
allNodesHelmValuesYaml.Daemonset.MetricbeatConfig.MetricbeatYml = templatedSystemMetricbeatYaml.String()
controlPlaneHelmValuesYaml.Daemonset.MetricbeatConfig.MetricbeatYml = templatedK8sMetricbeatYaml.String()
allNodesHelmValues, err := yaml.Marshal(allNodesHelmValuesYaml)
if err != nil {
return fmt.Errorf("marshal values-all-nodes.ym: %w", err)
}
controlPlaneHelmValues, err := yaml.Marshal(controlPlaneHelmValuesYaml)
if err != nil {
return fmt.Errorf("marshal values-control-plane.yml: %w", err)
}
if err = p.fh.Write(filepath.Join(dir, "metricbeat", "values-all-nodes.yml"), allNodesHelmValues, file.OptMkdirAll); err != nil {
return fmt.Errorf("write values-all-nodes.yml: %w", err)
}
if err = p.fh.Write(filepath.Join(dir, "metricbeat", "values-control-plane.yml"), controlPlaneHelmValues, file.OptMkdirAll); err != nil {
return fmt.Errorf("write values-control-plane.yml: %w", err)
}
return nil
}
// MetricbeatTemplateData is template data.
type MetricbeatTemplateData struct {
LogstashHost string
Port int
CollectEtcdMetrics bool
CollectSystemMetrics bool
CollectK8sMetrics bool
AddK8sMetadata bool
AddCloudMetadata bool
}
// MetricbeatHelmValues repesents the Helm values.yml.
type MetricbeatHelmValues struct {
Image string `yaml:"image"`
ImageTag string `yaml:"imageTag"`
KubeStateMetrics struct {
Enabled bool `yaml:"enabled"`
} `yaml:"kube_state_metrics"`
Deployment struct {
Enabled bool `yaml:"enabled"`
} `yaml:"deployment"`
Daemonset Daemonset `yaml:"daemonset"`
ClusterRoleRules []struct {
APIGroups []string `yaml:"apiGroups,omitempty"`
Resources []string `yaml:"resources,omitempty"`
Verbs []string `yaml:"verbs"`
NonResourceURLs []string `yaml:"nonResourceURLs,omitempty"`
} `yaml:"clusterRoleRules"`
}
// Daemonset represents the nested daemonset for the Helm values.yml.
type Daemonset struct {
Enabled bool `yaml:"enabled"`
HostNetworking bool `yaml:"hostNetworking"`
MetricbeatConfig struct {
MetricbeatYml string `yaml:"metricbeat.yml"`
} `yaml:"metricbeatConfig"`
ExtraEnvs []any `yaml:"extraEnvs"`
SecretMounts []any `yaml:"secretMounts"`
NodeSelector any `yaml:"nodeSelector"`
Tolerations []struct {
Key string `yaml:"key"`
Operator string `yaml:"operator"`
Effect string `yaml:"effect"`
} `yaml:"tolerations"`
SecurityContext struct {
Privileged bool `yaml:"privileged"`
RunAsUser int `yaml:"runAsUser"`
} `yaml:"securityContext"`
ExtraVolumeMounts []struct {
Name string `yaml:"name"`
MountPath string `yaml:"mountPath"`
ReadOnly bool `yaml:"readOnly"`
} `yaml:"extraVolumeMounts"`
ExtraVolumes []struct {
Name string `yaml:"name"`
HostPath struct {
Path string `yaml:"path"`
Type string `yaml:"type"`
} `yaml:"hostPath"`
} `yaml:"extraVolumes"`
}

View File

@ -0,0 +1,79 @@
# Helm Values for the DaemonSet that is deployed on all nodes in the cluster and
# collects system metrics.
image: ghcr.io/edgelesssys/beats/metricbeat-oss
imageTag: 8.11.0@sha256:c2488378e794467f2a7214a56da0de017db1f2c28198721f12d74ad9cc263d08
kube_state_metrics:
enabled: false
deployment:
enabled: false
daemonset:
enabled: true
metricbeatConfig:
metricbeat.yml: ""
hostNetworking: true
extraEnvs: []
secretMounts: []
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
securityContext:
privileged: true
runAsUser: 0
extraVolumeMounts:
- name: runsystemd
mountPath: /run/systemd
readOnly: true
- name: varrundbus
mountPath: /var/run/dbus
readOnly: true
extraVolumes:
- name: runsystemd
hostPath:
path: /run/systemd
type: ""
- name: varrundbus
hostPath:
path: /var/run/dbus
type: ""
clusterRoleRules:
- apiGroups: [""]
resources:
- nodes
- namespaces
- events
- pods
- services
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources:
- replicasets
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
- deployments
- replicasets
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- nodes/stats
verbs: ["get"]
- apiGroups: ["batch"]
resources:
- jobs
verbs: ["get", "list", "watch"]
- apiGroups:
- ""
resources:
- nodes/stats
verbs:
- get
- nonResourceURLs:
- "/metrics"
verbs:
- get

View File

@ -0,0 +1,88 @@
# Helm Values for the DaemonSet that is deployed on all control plane nodes in the cluster and
# collects Kubernetes and etcd metrics.
image: ghcr.io/edgelesssys/beats/metricbeat-oss
imageTag: 8.11.0@sha256:c2488378e794467f2a7214a56da0de017db1f2c28198721f12d74ad9cc263d08
kube_state_metrics:
enabled: false
deployment:
enabled: false
daemonset:
enabled: true
metricbeatConfig:
metricbeat.yml: ""
hostNetworking: true
extraEnvs: []
secretMounts: []
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
securityContext:
privileged: true
runAsUser: 0
extraVolumeMounts:
- name: runsystemd
mountPath: /run/systemd
readOnly: true
- name: varrundbus
mountPath: /var/run/dbus
readOnly: true
- name: etcdcerts
mountPath: /etc/kubernetes/pki/etcd
readOnly: true
extraVolumes:
- name: runsystemd
hostPath:
path: /run/systemd
type: ""
- name: varrundbus
hostPath:
path: /var/run/dbus
type: ""
- name: etcdcerts
hostPath:
path: /etc/kubernetes/pki/etcd
type: ""
clusterRoleRules:
- apiGroups: [""]
resources:
- nodes
- namespaces
- events
- pods
- services
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources:
- replicasets
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
- deployments
- replicasets
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- nodes/stats
verbs: ["get"]
- apiGroups: ["batch"]
resources:
- jobs
verbs: ["get", "list", "watch"]
- apiGroups:
- ""
resources:
- nodes/stats
verbs:
- get
- nonResourceURLs:
- "/metrics"
verbs:
- get

View File

@ -588,7 +588,7 @@ func toPtr[T any](v T) *T {
const (
waitInterval = 15 * time.Second
maxWait = 15 * time.Minute
maxWait = 30 * time.Minute
timestampFormat = "20060102150405"
)

View File

@ -170,9 +170,11 @@ const (
// NodeMaintenanceOperatorImage is the image for the node maintenance operator.
NodeMaintenanceOperatorImage = "quay.io/medik8s/node-maintenance-operator:v0.15.0@sha256:8cb8dad93283268282c30e75c68f4bd76b28def4b68b563d2f9db9c74225d634" // renovate:container
// LogstashImage is the container image of logstash, used for log collection by debugd.
LogstashImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container
LogstashImage = "ghcr.io/edgelesssys/constellation/logstash-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:d2258bd6f02394b33cca26b4565a8e1f44b29d85d0dec76027bac6afb7da2bee" // renovate:container
// FilebeatImage is the container image of filebeat, used for log collection by debugd.
FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container
FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:460a5e754438d97ece2e1672ea469055f2bdfdd99290b6c727c493d030d0c382" // renovate:container
// MetricbeatImage is the container image of filebeat, used for log collection by debugd.
MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:63ba8b5aa06b8186e9b6d1f37967363c2807aef05d998a5db70df08ee6734259" // renovate:container
// currently supported versions.
//nolint:revive