mirror of
https://github.com/edgelesssys/constellation.git
synced 2025-01-11 15:39:33 -05:00
debugd: return systemd logs if restarting a unit fails (#3159)
* Wait until logcollection containers are ready * Add systemd unit journal entries to error message --------- Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
parent
08770a69c4
commit
c682558940
@ -61,6 +61,7 @@ type SystemdUnit struct {
|
|||||||
type ServiceManager struct {
|
type ServiceManager struct {
|
||||||
log *slog.Logger
|
log *slog.Logger
|
||||||
dbus dbusClient
|
dbus dbusClient
|
||||||
|
journal journalReader
|
||||||
fs afero.Fs
|
fs afero.Fs
|
||||||
systemdUnitFilewriteLock sync.Mutex
|
systemdUnitFilewriteLock sync.Mutex
|
||||||
}
|
}
|
||||||
@ -71,6 +72,7 @@ func NewServiceManager(log *slog.Logger) *ServiceManager {
|
|||||||
return &ServiceManager{
|
return &ServiceManager{
|
||||||
log: log,
|
log: log,
|
||||||
dbus: &dbusWrapper{},
|
dbus: &dbusWrapper{},
|
||||||
|
journal: &journalctlWrapper{},
|
||||||
fs: fs,
|
fs: fs,
|
||||||
systemdUnitFilewriteLock: sync.Mutex{},
|
systemdUnitFilewriteLock: sync.Mutex{},
|
||||||
}
|
}
|
||||||
@ -99,6 +101,11 @@ type dbusConn interface {
|
|||||||
Close()
|
Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type journalReader interface {
|
||||||
|
// ReadJournal reads the journal for a specific unit.
|
||||||
|
readJournal(unit string) string
|
||||||
|
}
|
||||||
|
|
||||||
// SystemdAction will perform a systemd action on a service unit (start, stop, restart, reload).
|
// SystemdAction will perform a systemd action on a service unit (start, stop, restart, reload).
|
||||||
func (s *ServiceManager) SystemdAction(ctx context.Context, request ServiceManagerRequest) error {
|
func (s *ServiceManager) SystemdAction(ctx context.Context, request ServiceManagerRequest) error {
|
||||||
log := s.log.With(slog.String("unit", request.Unit), slog.String("action", request.Action.String()))
|
log := s.log.With(slog.String("unit", request.Unit), slog.String("action", request.Action.String()))
|
||||||
@ -139,7 +146,8 @@ func (s *ServiceManager) SystemdAction(ctx context.Context, request ServiceManag
|
|||||||
return nil
|
return nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("performing action %q on systemd unit %q failed: expected %q but received %q", request.Action.String(), request.Unit, "done", result)
|
serviceJournal := s.journal.readJournal(request.Unit)
|
||||||
|
return fmt.Errorf("performing action %q on systemd unit %q failed: expected %q but received %q. systemd unit journal entries: %s", request.Action.String(), request.Unit, "done", result, serviceJournal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,6 +104,7 @@ func TestSystemdAction(t *testing.T) {
|
|||||||
manager := ServiceManager{
|
manager := ServiceManager{
|
||||||
log: logger.NewTest(t),
|
log: logger.NewTest(t),
|
||||||
dbus: &tc.dbus,
|
dbus: &tc.dbus,
|
||||||
|
journal: &stubJournalReader{},
|
||||||
fs: fs,
|
fs: fs,
|
||||||
systemdUnitFilewriteLock: sync.Mutex{},
|
systemdUnitFilewriteLock: sync.Mutex{},
|
||||||
}
|
}
|
||||||
@ -183,6 +184,7 @@ func TestWriteSystemdUnitFile(t *testing.T) {
|
|||||||
manager := ServiceManager{
|
manager := ServiceManager{
|
||||||
log: logger.NewTest(t),
|
log: logger.NewTest(t),
|
||||||
dbus: &tc.dbus,
|
dbus: &tc.dbus,
|
||||||
|
journal: &stubJournalReader{},
|
||||||
fs: fs,
|
fs: fs,
|
||||||
systemdUnitFilewriteLock: sync.Mutex{},
|
systemdUnitFilewriteLock: sync.Mutex{},
|
||||||
}
|
}
|
||||||
@ -296,6 +298,7 @@ func TestOverrideServiceUnitExecStart(t *testing.T) {
|
|||||||
manager := ServiceManager{
|
manager := ServiceManager{
|
||||||
log: logger.NewTest(t),
|
log: logger.NewTest(t),
|
||||||
dbus: &tc.dbus,
|
dbus: &tc.dbus,
|
||||||
|
journal: &stubJournalReader{},
|
||||||
fs: fs,
|
fs: fs,
|
||||||
systemdUnitFilewriteLock: sync.Mutex{},
|
systemdUnitFilewriteLock: sync.Mutex{},
|
||||||
}
|
}
|
||||||
@ -367,3 +370,9 @@ func (c *fakeDbusConn) ReloadContext(_ context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *fakeDbusConn) Close() {}
|
func (c *fakeDbusConn) Close() {}
|
||||||
|
|
||||||
|
type stubJournalReader struct{}
|
||||||
|
|
||||||
|
func (s *stubJournalReader) readJournal(_ string) string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
@ -8,6 +8,7 @@ package deploy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"os/exec"
|
||||||
|
|
||||||
"github.com/coreos/go-systemd/v22/dbus"
|
"github.com/coreos/go-systemd/v22/dbus"
|
||||||
)
|
)
|
||||||
@ -48,3 +49,85 @@ func (c *dbusConnWrapper) ReloadContext(ctx context.Context) error {
|
|||||||
func (c *dbusConnWrapper) Close() {
|
func (c *dbusConnWrapper) Close() {
|
||||||
c.conn.Close()
|
c.conn.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type journalctlWrapper struct{}
|
||||||
|
|
||||||
|
func (j *journalctlWrapper) readJournal(unit string) string {
|
||||||
|
out, _ := exec.CommandContext(context.Background(), "journalctl", "-u", unit, "--no-pager").CombinedOutput()
|
||||||
|
return string(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
// Preferably, we would use the systemd journal API directly.
|
||||||
|
// However, this requires linking against systemd libraries, so we go with the easier journalctl command for now.
|
||||||
|
|
||||||
|
type sdJournalWrapper struct{}
|
||||||
|
|
||||||
|
// readJournal reads the journal for a specific unit.
|
||||||
|
func (s *sdJournalWrapper) readJournal(unit string) string {
|
||||||
|
journal, err := sdjournal.NewJournal()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("opening journal: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
defer journal.Close()
|
||||||
|
|
||||||
|
// Filter the journal for the specified unit
|
||||||
|
filters := []string{
|
||||||
|
fmt.Sprintf("_SYSTEMD_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("OBJECT_SYSTEMD_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("_SYSTEMD_SLICE=%s", unit),
|
||||||
|
fmt.Sprintf("_SYSTEMD_USER_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("USER_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("COREDUMP_USER_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("OBJECT_SYSTEMD_USER_UNIT=%s", unit),
|
||||||
|
fmt.Sprintf("_SYSTEMD_USER_SLICE=%s", unit),
|
||||||
|
}
|
||||||
|
for _, filter := range filters {
|
||||||
|
if err := journal.AddMatch(filter); err != nil {
|
||||||
|
log.Printf("applying filter %q: %s", filter, err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if err := journal.AddDisjunction(); err != nil {
|
||||||
|
log.Printf("adding disjunction to journal filter: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seek to the beginning of the journal
|
||||||
|
if err := journal.SeekHead(); err != nil {
|
||||||
|
log.Printf("seeking journal tail: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over the journal entries
|
||||||
|
var previousCursor string
|
||||||
|
journalLog := &strings.Builder{}
|
||||||
|
for {
|
||||||
|
if _, err := journal.Next(); err != nil {
|
||||||
|
log.Printf("getting next entry in journal: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
entry, err := journal.GetEntry()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("getting journal entry: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Abort if we reached the end of the journal, i.e. the cursor didn't change
|
||||||
|
if entry.Cursor == previousCursor {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
previousCursor = entry.Cursor
|
||||||
|
|
||||||
|
if _, err := journalLog.WriteString(entry.Fields[sdjournal.SD_JOURNAL_FIELD_MESSAGE] + "\n"); err != nil {
|
||||||
|
log.Printf("copying journal entry to buffer: %s", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(journalLog.String())
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
@ -200,6 +200,10 @@ func startPod(ctx context.Context, logger *slog.Logger) error {
|
|||||||
if err := runLogstashCmd.Start(); err != nil {
|
if err := runLogstashCmd.Start(); err != nil {
|
||||||
return fmt.Errorf("failed to start logstash: %w", err)
|
return fmt.Errorf("failed to start logstash: %w", err)
|
||||||
}
|
}
|
||||||
|
if out, err := exec.CommandContext(ctx, "podman", "wait", "logstash", "--condition=running", "--interval=15s").CombinedOutput(); err != nil {
|
||||||
|
logger.Error("Logstash container failed to reach healthy status", "err", err, "output", out)
|
||||||
|
return fmt.Errorf("waiting for logstash container to reach healthy status: %w; output: %s", err, out)
|
||||||
|
}
|
||||||
|
|
||||||
// start filebeat container
|
// start filebeat container
|
||||||
filebeatLog := newCmdLogger(logger.WithGroup("filebeat"))
|
filebeatLog := newCmdLogger(logger.WithGroup("filebeat"))
|
||||||
@ -225,6 +229,10 @@ func startPod(ctx context.Context, logger *slog.Logger) error {
|
|||||||
if err := runFilebeatCmd.Start(); err != nil {
|
if err := runFilebeatCmd.Start(); err != nil {
|
||||||
return fmt.Errorf("failed to run filebeat: %w", err)
|
return fmt.Errorf("failed to run filebeat: %w", err)
|
||||||
}
|
}
|
||||||
|
if out, err := exec.CommandContext(ctx, "podman", "wait", "filebeat", "--condition=running", "--interval=15s").CombinedOutput(); err != nil {
|
||||||
|
logger.Error("Filebeat container failed to reach healthy status", "err", err, "output", out)
|
||||||
|
return fmt.Errorf("waiting for filebeat container to reach healthy status: %w; output: %s", err, out)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user