ci: collect cluster metrics to OpenSearch (#2347)

* add Metricbeat deployment to debugd

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* set metricbeat debugd image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix k8s deployment

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* use 2 separate deployments

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* only deploy via k8s in non-debug-images

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add missing tilde

* remove k8s metrics

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* unify flag

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add cloud metadata processor to filebeat

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* ci: fix debugd logcollection (#2355)

* add missing keyvault access role

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* bump logstash image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* bump filebeat / metricbeat image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* log used image version

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* use debugging image versions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* increase wait timeout for image upload

* add cloud metadata processor to filebeat

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix template locations in container

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix image version typo

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add filebeat / metricbeat users

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove user additions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* update workflow step name

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* only mount config files

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* document potential rc

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix IAM permissions in workflow

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix AWS permissions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* tidy

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add missing workflow input

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* rename action

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* pin image versions

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove unnecessary workflow inputs

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

---------

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* add refStream input

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* remove inputs.yml dep

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* increase system metric period

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

* fix linkchecker

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>

---------

Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com>
This commit is contained in:
Moritz Sanft 2023-09-27 16:17:31 +02:00 committed by GitHub
parent c7b728f202
commit f4b2d02194
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 699 additions and 149 deletions

View file

@ -3,10 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "filebeat",
srcs = ["assets.go"],
embedsrcs = [
"templates/filebeat.yml",
"inputs.yml",
],
embedsrcs = ["templates/filebeat.yml"],
importpath = "github.com/edgelesssys/constellation/v2/debugd/filebeat",
visibility = ["//visibility:public"],
)

View file

@ -2,7 +2,6 @@ FROM fedora:38@sha256:6fc00f83a1b6526b1c6562e30f552d109ba8e269259c6742a26efab1b7
RUN dnf install -y https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.6.2-x86_64.rpm
COPY debugd/filebeat/inputs.yml /usr/share/filebeat/inputs.yml
COPY debugd/filebeat/templates/ /usr/share/filebeat/templates/
COPY debugd/filebeat/templates/ /usr/share/constellogs/templates/
ENTRYPOINT ["/usr/share/filebeat/bin/filebeat", "-e", "--path.home", "/usr/share/filebeat", "--path.data", "/usr/share/filebeat/data"]

View file

@ -10,6 +10,5 @@ import "embed"
// Assets are the exported Filebeat template files.
//
//go:embed *.yml
//go:embed templates/*
var Assets embed.FS

View file

@ -1,9 +0,0 @@
- type: journald
enabled: true
id: journald
- type: filestream
enabled: true
id: container
paths:
- /var/log/pods/*/*/*.log

View file

@ -9,12 +9,15 @@ logging:
metrics.enabled: false
level: warning
filebeat.config:
inputs:
filebeat.inputs:
- type: journald
enabled: true
path: /usr/share/filebeat/inputs.yml
# reload.enabled: true
# reload.period: 10s
id: journald
- type: filestream
enabled: true
id: container
paths:
- /var/log/pods/*/*/*.log
timestamp.precision: nanosecond
@ -27,3 +30,6 @@ processors:
field: "log.file.path"
target_prefix: "kubernetes"
ignore_failure: true
{{ if .AddCloudMetadata }}
- add_cloud_metadata: ~
{{ end }}

View file

@ -32,7 +32,7 @@ const (
// NewStartTrigger returns a trigger func can be registered with an infos instance.
// The trigger is called when infos changes to received state and starts a log collection pod
// with filebeat and logstash in case the flags are set.
// with filebeat, metricbeat and logstash in case the flags are set.
//
// This requires podman to be installed.
func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprovider.Provider,
@ -74,7 +74,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
return
}
logger.Infof("Getting logstash pipeline template")
logger.Infof("Getting logstash pipeline template from image %s", versions.LogstashImage)
tmpl, err := getTemplate(ctx, logger, versions.LogstashImage, "/run/logstash/templates/pipeline.conf", "/run/logstash")
if err != nil {
logger.Errorf("Getting logstash pipeline template: %v", err)
@ -97,28 +97,46 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
InfoMap: infoMapM,
Credentials: creds,
}
if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, pipelineConf); err != nil {
logger.Errorf("Writing logstash pipeline: %v", err)
if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, pipelineConf); err != nil {
logger.Errorf("Writing logstash config: %v", err)
return
}
logger.Infof("Getting logstash config template")
logger.Infof("Getting filebeat config template from image %s", versions.FilebeatImage)
tmpl, err = getTemplate(ctx, logger, versions.FilebeatImage, "/run/filebeat/templates/filebeat.yml", "/run/filebeat")
if err != nil {
logger.Errorf("Getting filebeat config template: %v", err)
return
}
filebeatConf := filebeatConfInput{
LogstashHost: "localhost:5044",
LogstashHost: "localhost:5044",
AddCloudMetadata: true,
}
if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, filebeatConf); err != nil {
logger.Errorf("Writing filebeat config: %v", err)
if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, filebeatConf); err != nil {
logger.Errorf("Writing filebeat pipeline: %v", err)
return
}
logger.Infof("Getting metricbeat config template from image %s", versions.MetricbeatImage)
tmpl, err = getTemplate(ctx, logger, versions.MetricbeatImage, "/run/metricbeat/templates/metricbeat.yml", "/run/metricbeat")
if err != nil {
logger.Errorf("Getting metricbeat config template: %v", err)
return
}
metricbeatConf := metricbeatConfInput{
LogstashHost: "localhost:5044",
Port: 5066,
CollectSystemMetrics: true,
AddCloudMetadata: true,
}
if err := writeTemplate("/run/metricbeat/metricbeat.yml", tmpl, metricbeatConf); err != nil {
logger.Errorf("Writing metricbeat pipeline: %v", err)
return
}
logger.Infof("Starting log collection pod")
if err := startPod(ctx, logger); err != nil {
logger.Errorf("Starting filebeat: %v", err)
logger.Errorf("Starting log collection: %v", err)
}
}()
}
@ -170,7 +188,7 @@ func getTemplate(ctx context.Context, logger *logger.Logger, image, templateDir,
}
func startPod(ctx context.Context, logger *logger.Logger) error {
// create a shared pod for filebeat and logstash
// create a shared pod for filebeat, metricbeat and logstash
createPodArgs := []string{
"pod",
"create",
@ -215,7 +233,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
"--volume=/run/systemd:/run/systemd:ro",
"--volume=/run/systemd/journal/socket:/run/systemd/journal/socket:rw",
"--volume=/run/state/var/log:/var/log:ro",
"--volume=/run/filebeat:/usr/share/filebeat/:ro",
"--volume=/run/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro",
versions.FilebeatImage,
}
runFilebeatCmd := exec.CommandContext(ctx, "podman", runFilebeatArgs...)
@ -226,6 +244,28 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
return fmt.Errorf("failed to run filebeat: %w", err)
}
// start metricbeat container
metricbeatLog := newCmdLogger(logger.Named("metricbeat"))
runMetricbeatArgs := []string{
"run",
"--rm",
"--name=metricbeat",
"--pod=logcollection",
"--privileged",
"--log-driver=none",
"--volume=/proc:/hostfs/proc:ro",
"--volume=/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro",
"--volume=/run/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro",
versions.MetricbeatImage,
}
runMetricbeatCmd := exec.CommandContext(ctx, "podman", runMetricbeatArgs...)
logger.Infof("Run metricbeat command: %v", runMetricbeatCmd.String())
runMetricbeatCmd.Stdout = metricbeatLog
runMetricbeatCmd.Stderr = metricbeatLog
if err := runMetricbeatCmd.Start(); err != nil {
return fmt.Errorf("failed to run metricbeat: %w", err)
}
return nil
}
@ -238,7 +278,16 @@ type logstashConfInput struct {
}
type filebeatConfInput struct {
LogstashHost string
LogstashHost string
AddCloudMetadata bool
}
type metricbeatConfInput struct {
Port int
LogstashHost string
CollectEtcdMetrics bool
CollectSystemMetrics bool
AddCloudMetadata bool
}
func writeTemplate(path string, templ *template.Template, in any) error {

View file

@ -1,66 +0,0 @@
input {
beats {
host => "0.0.0.0"
port => 5044
}
}
filter {
mutate {
# Remove some fields that are not needed.
remove_field => [
"[agent]",
"[journald]",
"[syslog]",
"[systemd][invocation_id]",
"[event][original]",
"[log][offset]",
"[log][syslog]"
]
# Tag with the provided metadata.
add_field => {
{{ range $key, $value := .InfoMap }}
"[metadata][{{ $key }}]" => "{{ $value }}"
{{ end }}
}
}
# Parse structured logs for following systemd units.
if [systemd][unit] in ["bootstrapper.service", "constellation-bootstrapper.service"] {
# skip_on_invalid_json below does not skip the whole filter, so let's use a cheap workaround here.
# See:
# https://discuss.elastic.co/t/skip-on-invalid-json-skipping-all-filters/215195
# https://discuss.elastic.co/t/looking-for-a-way-to-detect-json/102263
if [message] =~ "\A\{.+\}\z" {
json {
source => "message"
target => "logs"
skip_on_invalid_json => true
}
mutate {
replace => {
"message" => "%{[logs][msg]}"
}
remove_field => [
"[logs][msg]",
"[logs][ts]"
]
}
de_dot {
fields => ["[logs][peer.address]"]
}
}
}
}
output {
opensearch {
hosts => "{{ .Host }}"
index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}"
user => "{{ .Credentials.Username }}"
password => "{{ .Credentials.Password }}"
ssl => true
ssl_certificate_verification => true
}
}

View file

@ -57,7 +57,7 @@ filter {
output {
opensearch {
hosts => "{{ .Host }}"
index => "systemd-logs-%{+YYYY.MM.dd}"
index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}"
user => "{{ .Credentials.Username }}"
password => "{{ .Credentials.Password }}"
ssl => true

View file

@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "metricbeat",
srcs = ["assets.go"],
embedsrcs = ["templates/metricbeat.yml"],
importpath = "github.com/edgelesssys/constellation/v2/debugd/metricbeat",
visibility = ["//visibility:public"],
)

View file

@ -0,0 +1,15 @@
FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158cc2399 AS release
RUN dnf install -y https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-8.9.2-x86_64.rpm
COPY debugd/metricbeat/templates/ /usr/share/constellogs/templates/
# -e enables logging to stderr
# -E http.enabled=true enables http endpoint as seen in https://github.com/elastic/helm-charts/blob/main/metricbeat/templates/daemonset.yaml
# --path.home and --path.data set the paths to the metricbeat binary and data directory
ENTRYPOINT [ "/usr/share/metricbeat/bin/metricbeat", \
"-e", \
"-E", "http.enabled=true", \
"--path.home", "/usr/share/metricbeat", \
"--path.data", "/usr/share/metricbeat/data" \
]

View file

@ -0,0 +1,14 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/
package metricbeat
import "embed"
// Assets are the exported Metricbeat template files.
//
//go:embed templates/*
var Assets embed.FS

View file

@ -0,0 +1,63 @@
http.port: {{ .Port }}
output.logstash:
hosts: ["{{ .LogstashHost }}"]
output.console:
enabled: false
logging:
to_files: false
metrics.enabled: false
level: warning
timestamp.precision: nanosecond
# https://www.elastic.co/guide/en/beats/metricbeat/current/configuration-path.html#_system_hostfs
system.hostfs: "/hostfs"
metricbeat.modules:
{{ if .CollectSystemMetrics }}
- module: system
enabled: true
metricsets:
- cpu # CPU usage
- load # CPU load averages
- memory # Memory usage
- network # Network IO
- process # Per process metrics
- process_summary # Process summary
#- uptime # System Uptime
#- socket_summary # Socket summary
#- core # Per CPU core usage
- diskio # Disk IO
- filesystem # File system usage for each mountpoint
- fsstat # File system summary metrics
#- raid # Raid
#- socket # Sockets and connection info (linux only)
#- service # systemd service information
cpu.metrics: ["percentages","normalized_percentages"]
period: 10s
processes: ['.*']
# To monitor host metrics from within a container. As per:
# https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-system.html#_example_configuration_59
hostfs: "/hostfs"
{{ end }}
{{ if .CollectEtcdMetrics }}
- module: etcd
enabled: true
metricsets: ["metrics"]
period: 30s
hosts: ["https://localhost:2379"]
ssl:
certificate_authorities: ["/etc/kubernetes/pki/etcd/ca.crt"]
certificate: "/etc/kubernetes/pki/etcd/peer.crt"
key: "/etc/kubernetes/pki/etcd/peer.key"
verification_mode: "full"
{{ end }}
processors:
{{ if .AddCloudMetadata }}
- add_cloud_metadata: ~
{{ end }}