mirror of
https://github.com/edgelesssys/constellation.git
synced 2024-10-01 01:36:09 -04:00
cli: report log collection failure to user (#2354)
* Report log collection failure to user * Try collecting logs for more error cases --------- Signed-off-by: Daniel Weiße <dw@edgeless.systems>
This commit is contained in:
parent
d0e3e494ba
commit
fa4da88375
@ -235,8 +235,12 @@ func (i *initCmd) initialize(
|
|||||||
if errors.As(err, &nonRetriable) {
|
if errors.As(err, &nonRetriable) {
|
||||||
cmd.PrintErrln("Cluster initialization failed. This error is not recoverable.")
|
cmd.PrintErrln("Cluster initialization failed. This error is not recoverable.")
|
||||||
cmd.PrintErrln("Terminate your cluster and try again.")
|
cmd.PrintErrln("Terminate your cluster and try again.")
|
||||||
|
if nonRetriable.logCollectionErr != nil {
|
||||||
|
cmd.PrintErrf("Failed to collect logs from bootstrapper: %s\n", nonRetriable.logCollectionErr)
|
||||||
|
} else {
|
||||||
cmd.PrintErrf("Fetched bootstrapper logs are stored in %q\n", i.pf.PrefixPrintablePath(constants.ErrorLog))
|
cmd.PrintErrf("Fetched bootstrapper logs are stored in %q\n", i.pf.PrefixPrintablePath(constants.ErrorLog))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
i.log.Debugf("Initialization request succeeded")
|
i.log.Debugf("Initialization request succeeded")
|
||||||
@ -330,7 +334,10 @@ func (d *initDoer) Do(ctx context.Context) error {
|
|||||||
// connectedOnce is set in handleGRPCStateChanges when a connection was established in one retry attempt.
|
// connectedOnce is set in handleGRPCStateChanges when a connection was established in one retry attempt.
|
||||||
// This should cancel any other retry attempts when the connection is lost since the bootstrapper likely won't accept any new attempts anymore.
|
// This should cancel any other retry attempts when the connection is lost since the bootstrapper likely won't accept any new attempts anymore.
|
||||||
if d.connectedOnce {
|
if d.connectedOnce {
|
||||||
return &nonRetriableError{errors.New("init already connected to the remote server in a previous attempt - resumption is not supported")}
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: errors.New("init already connected to the remote server in a previous attempt - resumption is not supported"),
|
||||||
|
err: errors.New("init already connected to the remote server in a previous attempt - resumption is not supported"),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
conn, err := d.dialer.Dial(ctx, d.endpoint)
|
conn, err := d.dialer.Dial(ctx, d.endpoint)
|
||||||
@ -351,31 +358,58 @@ func (d *initDoer) Do(ctx context.Context) error {
|
|||||||
d.log.Debugf("Created protoClient")
|
d.log.Debugf("Created protoClient")
|
||||||
resp, err := protoClient.Init(ctx, d.req)
|
resp, err := protoClient.Init(ctx, d.req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &nonRetriableError{fmt.Errorf("init call: %w", err)}
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: errors.New("rpc failed before first response was received - no logs available"),
|
||||||
|
err: fmt.Errorf("init call: %w", err),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := resp.Recv() // get first response, either success or failure
|
res, err := resp.Recv() // get first response, either success or failure
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if e := d.getLogs(resp); e != nil {
|
if e := d.getLogs(resp); e != nil {
|
||||||
d.log.Debugf("Failed to collect logs: %s", e)
|
d.log.Debugf("Failed to collect logs: %s", e)
|
||||||
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: e,
|
||||||
|
err: err,
|
||||||
}
|
}
|
||||||
return &nonRetriableError{err}
|
}
|
||||||
|
return &nonRetriableError{err: err}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch res.Kind.(type) {
|
switch res.Kind.(type) {
|
||||||
case *initproto.InitResponse_InitFailure:
|
case *initproto.InitResponse_InitFailure:
|
||||||
if e := d.getLogs(resp); e != nil {
|
if e := d.getLogs(resp); e != nil {
|
||||||
d.log.Debugf("Failed to get logs from cluster: %s", e)
|
d.log.Debugf("Failed to get logs from cluster: %s", e)
|
||||||
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: e,
|
||||||
|
err: errors.New(res.GetInitFailure().GetError()),
|
||||||
}
|
}
|
||||||
return &nonRetriableError{errors.New(res.GetInitFailure().GetError())}
|
}
|
||||||
|
return &nonRetriableError{err: errors.New(res.GetInitFailure().GetError())}
|
||||||
case *initproto.InitResponse_InitSuccess:
|
case *initproto.InitResponse_InitSuccess:
|
||||||
d.resp = res.GetInitSuccess()
|
d.resp = res.GetInitSuccess()
|
||||||
case nil:
|
case nil:
|
||||||
d.log.Debugf("Cluster returned nil response type")
|
d.log.Debugf("Cluster returned nil response type")
|
||||||
return &nonRetriableError{errors.New("empty response from cluster")}
|
err = errors.New("empty response from cluster")
|
||||||
|
if e := d.getLogs(resp); e != nil {
|
||||||
|
d.log.Debugf("Failed to collect logs: %s", e)
|
||||||
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: e,
|
||||||
|
err: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &nonRetriableError{err: err}
|
||||||
default:
|
default:
|
||||||
d.log.Debugf("Cluster returned unknown response type")
|
d.log.Debugf("Cluster returned unknown response type")
|
||||||
return &nonRetriableError{errors.New("unknown response from cluster")}
|
err = errors.New("unknown response from cluster")
|
||||||
|
if e := d.getLogs(resp); e != nil {
|
||||||
|
d.log.Debugf("Failed to collect logs: %s", e)
|
||||||
|
return &nonRetriableError{
|
||||||
|
logCollectionErr: e,
|
||||||
|
err: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &nonRetriableError{err: err}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -392,9 +426,18 @@ func (d *initDoer) getLogs(resp initproto.API_InitClient) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch res.Kind.(type) {
|
||||||
|
case *initproto.InitResponse_InitFailure:
|
||||||
|
return errors.New("trying to collect logs: received init failure response, expected log response")
|
||||||
|
case *initproto.InitResponse_InitSuccess:
|
||||||
|
return errors.New("trying to collect logs: received init success response, expected log response")
|
||||||
|
case nil:
|
||||||
|
return errors.New("trying to collect logs: received nil response, expected log response")
|
||||||
|
}
|
||||||
|
|
||||||
log := res.GetLog().GetLog()
|
log := res.GetLog().GetLog()
|
||||||
if log == nil {
|
if log == nil {
|
||||||
return errors.New("sent empty logs")
|
return errors.New("received empty logs")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := d.fh.Write(constants.ErrorLog, log, file.OptAppend); err != nil {
|
if err := d.fh.Write(constants.ErrorLog, log, file.OptAppend); err != nil {
|
||||||
@ -609,6 +652,7 @@ type grpcDialer interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type nonRetriableError struct {
|
type nonRetriableError struct {
|
||||||
|
logCollectionErr error
|
||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,22 +90,47 @@ func TestInitialize(t *testing.T) {
|
|||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
configMutator: func(c *config.Config) { c.Provider.GCP.ServiceAccountKeyPath = serviceAccPath },
|
configMutator: func(c *config.Config) { c.Provider.GCP.ServiceAccountKeyPath = serviceAccPath },
|
||||||
serviceAccKey: gcpServiceAccKey,
|
serviceAccKey: gcpServiceAccKey,
|
||||||
initServerAPI: &stubInitServer{res: &initproto.InitResponse{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}},
|
initServerAPI: &stubInitServer{res: []*initproto.InitResponse{{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}}},
|
||||||
},
|
},
|
||||||
"initialize some azure instances": {
|
"initialize some azure instances": {
|
||||||
provider: cloudprovider.Azure,
|
provider: cloudprovider.Azure,
|
||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
initServerAPI: &stubInitServer{res: &initproto.InitResponse{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}},
|
initServerAPI: &stubInitServer{res: []*initproto.InitResponse{{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}}},
|
||||||
},
|
},
|
||||||
"initialize some qemu instances": {
|
"initialize some qemu instances": {
|
||||||
provider: cloudprovider.QEMU,
|
provider: cloudprovider.QEMU,
|
||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
initServerAPI: &stubInitServer{res: &initproto.InitResponse{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}},
|
initServerAPI: &stubInitServer{res: []*initproto.InitResponse{{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}}},
|
||||||
},
|
},
|
||||||
"non retriable error": {
|
"non retriable error": {
|
||||||
provider: cloudprovider.QEMU,
|
provider: cloudprovider.QEMU,
|
||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
initServerAPI: &stubInitServer{initErr: &nonRetriableError{assert.AnError}},
|
initServerAPI: &stubInitServer{initErr: &nonRetriableError{err: assert.AnError}},
|
||||||
|
retriable: false,
|
||||||
|
masterSecretShouldExist: true,
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
"non retriable error with failed log collection": {
|
||||||
|
provider: cloudprovider.QEMU,
|
||||||
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
|
initServerAPI: &stubInitServer{
|
||||||
|
res: []*initproto.InitResponse{
|
||||||
|
{
|
||||||
|
Kind: &initproto.InitResponse_InitFailure{
|
||||||
|
InitFailure: &initproto.InitFailureResponse{
|
||||||
|
Error: "error",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Kind: &initproto.InitResponse_InitFailure{
|
||||||
|
InitFailure: &initproto.InitFailureResponse{
|
||||||
|
Error: "error",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
retriable: false,
|
retriable: false,
|
||||||
masterSecretShouldExist: true,
|
masterSecretShouldExist: true,
|
||||||
wantErr: true,
|
wantErr: true,
|
||||||
@ -132,7 +157,7 @@ func TestInitialize(t *testing.T) {
|
|||||||
"k8s version without v works": {
|
"k8s version without v works": {
|
||||||
provider: cloudprovider.Azure,
|
provider: cloudprovider.Azure,
|
||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
initServerAPI: &stubInitServer{res: &initproto.InitResponse{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}},
|
initServerAPI: &stubInitServer{res: []*initproto.InitResponse{{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}}},
|
||||||
configMutator: func(c *config.Config) {
|
configMutator: func(c *config.Config) {
|
||||||
res, err := versions.NewValidK8sVersion(strings.TrimPrefix(string(versions.Default), "v"), true)
|
res, err := versions.NewValidK8sVersion(strings.TrimPrefix(string(versions.Default), "v"), true)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -142,7 +167,7 @@ func TestInitialize(t *testing.T) {
|
|||||||
"outdated k8s patch version doesn't work": {
|
"outdated k8s patch version doesn't work": {
|
||||||
provider: cloudprovider.Azure,
|
provider: cloudprovider.Azure,
|
||||||
idFile: &clusterid.File{IP: "192.0.2.1"},
|
idFile: &clusterid.File{IP: "192.0.2.1"},
|
||||||
initServerAPI: &stubInitServer{res: &initproto.InitResponse{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}},
|
initServerAPI: &stubInitServer{res: []*initproto.InitResponse{{Kind: &initproto.InitResponse_InitSuccess{InitSuccess: testInitResp}}}},
|
||||||
configMutator: func(c *config.Config) {
|
configMutator: func(c *config.Config) {
|
||||||
v, err := semver.New(versions.SupportedK8sVersions()[0])
|
v, err := semver.New(versions.SupportedK8sVersions()[0])
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -458,7 +483,8 @@ func TestAttestation(t *testing.T) {
|
|||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
require := require.New(t)
|
require := require.New(t)
|
||||||
|
|
||||||
initServerAPI := &stubInitServer{res: &initproto.InitResponse{
|
initServerAPI := &stubInitServer{res: []*initproto.InitResponse{
|
||||||
|
{
|
||||||
Kind: &initproto.InitResponse_InitSuccess{
|
Kind: &initproto.InitResponse_InitSuccess{
|
||||||
InitSuccess: &initproto.InitSuccessResponse{
|
InitSuccess: &initproto.InitSuccessResponse{
|
||||||
Kubeconfig: []byte("kubeconfig"),
|
Kubeconfig: []byte("kubeconfig"),
|
||||||
@ -466,6 +492,7 @@ func TestAttestation(t *testing.T) {
|
|||||||
ClusterId: []byte("clusterID"),
|
ClusterId: []byte("clusterID"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}}
|
}}
|
||||||
existingIDFile := &clusterid.File{IP: "192.0.2.4", CloudProvider: cloudprovider.QEMU}
|
existingIDFile := &clusterid.File{IP: "192.0.2.4", CloudProvider: cloudprovider.QEMU}
|
||||||
|
|
||||||
@ -577,14 +604,16 @@ func (i *testIssuer) Issue(_ context.Context, userData []byte, _ []byte) ([]byte
|
|||||||
}
|
}
|
||||||
|
|
||||||
type stubInitServer struct {
|
type stubInitServer struct {
|
||||||
res *initproto.InitResponse
|
res []*initproto.InitResponse
|
||||||
initErr error
|
initErr error
|
||||||
|
|
||||||
initproto.UnimplementedAPIServer
|
initproto.UnimplementedAPIServer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *stubInitServer) Init(_ *initproto.InitRequest, stream initproto.API_InitServer) error {
|
func (s *stubInitServer) Init(_ *initproto.InitRequest, stream initproto.API_InitServer) error {
|
||||||
_ = stream.Send(s.res)
|
for _, r := range s.res {
|
||||||
|
_ = stream.Send(r)
|
||||||
|
}
|
||||||
return s.initErr
|
return s.initErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user