AB#2181: retry k8s downloads (#286)

Generalize retrier:
* Generalize Do to use a supplied 'retriable' function
* Make clock an optional argument in NewIntervalRetrier
* Move grpc/retrier to interal package
* Update existing unittests to not use retry feature

Add retryDownloadToTempDir:
* Wrap downloadToTempDir with retrier.
* Retry if TCP connection is reset.
* Abort by canceling the context.
* Use a mock server in the unit test that serves responses
depending on the state received through a state channel.

Co-authored-by: katexochen <49727155+katexochen@users.noreply.github.com>
This commit is contained in:
Otto Bittner 2022-07-21 15:20:12 +02:00 committed by GitHub
parent 741384158a
commit c743398a23
7 changed files with 338 additions and 146 deletions

61
internal/retry/retry.go Normal file
View file

@ -0,0 +1,61 @@
package retry
import (
"context"
"time"
"k8s.io/utils/clock"
)
// IntervalRetrier retries a call with an interval. The call is defined in the Doer property.
type IntervalRetrier struct {
interval time.Duration
doer Doer
clock clock.WithTicker
retriable func(error) bool
}
// NewIntervalRetrier returns a new IntervalRetrier. The optional clock is used for testing.
func NewIntervalRetrier(doer Doer, interval time.Duration, retriable func(error) bool, optClock ...clock.WithTicker) *IntervalRetrier {
var clock clock.WithTicker = clock.RealClock{}
if len(optClock) > 0 {
clock = optClock[0]
}
return &IntervalRetrier{
interval: interval,
doer: doer,
clock: clock,
retriable: retriable,
}
}
// Do retries performing a call until it succeeds, returns a permanent error or the context is cancelled.
func (r *IntervalRetrier) Do(ctx context.Context) error {
ticker := r.clock.NewTicker(r.interval)
defer ticker.Stop()
for {
err := r.doer.Do(ctx)
if err == nil {
return nil
}
if !r.retriable(err) {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C():
}
}
}
type Doer interface {
// Do performs an operation.
//
// It should return an error that can be checked for retriability.
Do(ctx context.Context) error
}

View file

@ -0,0 +1,98 @@
package retry
import (
"context"
"errors"
"testing"
"time"
"github.com/stretchr/testify/assert"
testclock "k8s.io/utils/clock/testing"
)
func TestDo(t *testing.T) {
testCases := map[string]struct {
cancel bool
errors []error
wantErr error
}{
"no error": {
errors: []error{
nil,
},
},
"permanent error": {
errors: []error{
errors.New("error"),
},
wantErr: errors.New("error"),
},
"service unavailable then success": {
errors: []error{
errors.New("retry me"),
nil,
},
},
"service unavailable then permanent error": {
errors: []error{
errors.New("retry me"),
errors.New("error"),
},
wantErr: errors.New("error"),
},
"cancellation works": {
cancel: true,
errors: []error{
errors.New("retry me"),
},
wantErr: context.Canceled,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
doer := newStubDoer()
clock := testclock.NewFakeClock(time.Now())
retrier := IntervalRetrier{
doer: doer,
clock: clock,
retriable: isRetriable,
}
retrierResult := make(chan error, 1)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() { retrierResult <- retrier.Do(ctx) }()
if tc.cancel {
cancel()
}
for _, err := range tc.errors {
doer.errC <- err
clock.Step(retrier.interval)
}
assert.Equal(tc.wantErr, <-retrierResult)
})
}
}
type stubDoer struct {
errC chan error
}
func newStubDoer() *stubDoer {
return &stubDoer{
errC: make(chan error),
}
}
func (d *stubDoer) Do(_ context.Context) error {
return <-d.errC
}
func isRetriable(err error) bool {
return err.Error() == "retry me"
}