cloud: fix discovery of GCP nodes across multiple zones (#1943)

This commit is contained in:
Malte Poll 2023-06-20 12:02:31 +02:00 committed by GitHub
parent de2c21b555
commit 0b262a08bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 418 additions and 8 deletions

View File

@ -23,6 +23,8 @@ import (
"path"
"regexp"
"strings"
"sync"
"time"
compute "cloud.google.com/go/compute/apiv1"
"cloud.google.com/go/compute/apiv1/computepb"
@ -38,6 +40,8 @@ import (
const (
// tagUsage is a label key used to indicate the use of the resource.
tagUsage = "constellation-use"
// maxCacheAgeInvariantResource is the maximum age of cached metadata for invariant resources.
maxCacheAgeInvariantResource = 24 * time.Hour // 1 day
)
var (
@ -52,8 +56,18 @@ type Cloud struct {
imds imdsAPI
instanceAPI instanceAPI
subnetAPI subnetAPI
zoneAPI zoneAPI
closers []func() error
// cached metadata
cacheMux sync.Mutex
regionCache string
regionCacheTime time.Time
zoneCache map[string]struct {
zones []string
zoneCacheTime time.Time
}
}
// New creates and initializes Cloud.
@ -85,12 +99,19 @@ func New(ctx context.Context) (cloud *Cloud, err error) {
}
closers = append(closers, subnetAPI.Close)
zoneAPI, err := compute.NewZonesRESTClient(ctx)
if err != nil {
return nil, err
}
closers = append(closers, zoneAPI.Close)
return &Cloud{
imds: imds.NewClient(nil),
instanceAPI: &instanceClient{insAPI},
globalForwardingRulesAPI: &globalForwardingRulesClient{globalForwardingRulesAPI},
regionalForwardingRulesAPI: &regionalForwardingRulesClient{regionalForwardingRulesAPI},
subnetAPI: subnetAPI,
zoneAPI: &zoneClient{zoneAPI},
closers: closers,
}, nil
}
@ -192,6 +213,7 @@ func (c *Cloud) getRegionalForwardingRule(ctx context.Context, project, uid, reg
}
// List retrieves all instances belonging to the current constellation.
// On GCP, this is done by listing all instances in a region by requesting all instances in each zone.
func (c *Cloud) List(ctx context.Context) ([]metadata.InstanceMetadata, error) {
project, zone, instanceName, err := c.retrieveInstanceInfo()
if err != nil {
@ -202,8 +224,32 @@ func (c *Cloud) List(ctx context.Context) ([]metadata.InstanceMetadata, error) {
return nil, err
}
region, err := c.region()
if err != nil {
return nil, fmt.Errorf("getting region: %w", err)
}
zones, err := c.zones(ctx, project, region)
if err != nil {
return nil, fmt.Errorf("getting zones: %w", err)
}
var instances []metadata.InstanceMetadata
for _, zone := range zones {
zoneInstances, err := c.listInZone(ctx, project, zone, uid)
if err != nil {
return nil, fmt.Errorf("listing instances in zone %s: %w", zone, err)
}
instances = append(instances, zoneInstances...)
}
return instances, nil
}
// listInZone retrieves all instances belonging to the current constellation in a given zone.
func (c *Cloud) listInZone(ctx context.Context, project, zone, uid string) ([]metadata.InstanceMetadata, error) {
var instances []metadata.InstanceMetadata
var resp *computepb.Instance
var err error
iter := c.instanceAPI.List(ctx, &computepb.ListInstancesRequest{
Filter: proto.String(fmt.Sprintf("labels.%s:%s", cloud.TagUID, uid)),
Project: project,
@ -399,6 +445,70 @@ func (c *Cloud) initSecretHash(ctx context.Context, project, zone, instanceName
return "", errors.New("retrieving compute instance: received instance with no init secret hash label")
}
// region retrieves the region that this instance is located in.
func (c *Cloud) region() (string, error) {
c.cacheMux.Lock()
defer c.cacheMux.Unlock()
// try to retrieve from cache first
if c.regionCache != "" &&
time.Since(c.regionCacheTime) < maxCacheAgeInvariantResource {
return c.regionCache, nil
}
zone, err := c.imds.Zone()
if err != nil {
return "", fmt.Errorf("retrieving zone from imds: %w", err)
}
region, err := regionFromZone(zone)
if err != nil {
return "", fmt.Errorf("retrieving region from zone: %w", err)
}
c.regionCache = region
c.regionCacheTime = time.Now()
return region, nil
}
// zones retrieves all zones that are within a region.
func (c *Cloud) zones(ctx context.Context, project, region string) ([]string, error) {
c.cacheMux.Lock()
defer c.cacheMux.Unlock()
// try to retrieve from cache first
if cachedZones, ok := c.zoneCache[region]; ok &&
time.Since(cachedZones.zoneCacheTime) < maxCacheAgeInvariantResource {
return cachedZones.zones, nil
}
req := &computepb.ListZonesRequest{
Project: project,
Filter: proto.String(fmt.Sprintf("name = \"%s*\"", region)),
}
zonesIter := c.zoneAPI.List(ctx, req)
var zones []string
var resp *computepb.Zone
var err error
for resp, err = zonesIter.Next(); err == nil; resp, err = zonesIter.Next() {
if resp == nil || resp.Name == nil {
continue
}
zones = append(zones, *resp.Name)
}
if err != nil && err != iterator.Done {
return nil, fmt.Errorf("listing zones: %w", err)
}
if c.zoneCache == nil {
c.zoneCache = make(map[string]struct {
zones []string
zoneCacheTime time.Time
})
}
c.zoneCache[region] = struct {
zones []string
zoneCacheTime time.Time
}{
zones: zones,
zoneCacheTime: time.Now(),
}
return zones, nil
}
// convertToInstanceMetadata converts a *computepb.Instance to a metadata.InstanceMetadata.
func convertToInstanceMetadata(in *computepb.Instance, project string, zone string) (metadata.InstanceMetadata, error) {
if in.Name == nil {
@ -435,3 +545,11 @@ func convertToInstanceMetadata(in *computepb.Instance, project string, zone stri
AliasIPRanges: ips,
}, nil
}
func regionFromZone(zone string) (string, error) {
zoneParts := strings.Split(zone, "-")
if len(zoneParts) != 3 {
return "", fmt.Errorf("invalid zone format: %s", zone)
}
return fmt.Sprintf("%s-%s", zoneParts[0], zoneParts[1]), nil
}

View File

@ -506,6 +506,26 @@ func TestList(t *testing.T) {
},
},
}
otherZoneInstance := &computepb.Instance{
Name: proto.String("otherZoneInstance"),
Zone: proto.String("someZone-east1-a"),
Labels: map[string]string{
cloud.TagUID: "1234",
cloud.TagRole: role.ControlPlane.String(),
},
NetworkInterfaces: []*computepb.NetworkInterface{
{
Name: proto.String("nic0"),
NetworkIP: proto.String("192.0.2.1"),
AliasIpRanges: []*computepb.AliasIpRange{
{
IpCidrRange: proto.String("198.51.100.0/24"),
},
},
Subnetwork: proto.String("projects/someProject/regions/someRegion/subnetworks/someSubnetwork"),
},
},
}
goodSubnet := &computepb.Subnetwork{
SecondaryIpRanges: []*computepb.SubnetworkSecondaryRange{
{
@ -516,8 +536,9 @@ func TestList(t *testing.T) {
testCases := map[string]struct {
imds stubIMDS
instanceAPI stubInstanceAPI
instanceAPI instanceAPI
subnetAPI stubSubnetAPI
zoneAPI stubZoneAPI
wantErr bool
wantInstances []metadata.InstanceMetadata
}{
@ -527,7 +548,7 @@ func TestList(t *testing.T) {
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instance: goodInstance,
iterator: &stubInstanceIterator{
instances: []*computepb.Instance{
@ -538,6 +559,15 @@ func TestList(t *testing.T) {
subnetAPI: stubSubnetAPI{
subnet: goodSubnet,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{
Name: proto.String("someZone-west3-b"),
},
},
},
},
wantInstances: []metadata.InstanceMetadata{
{
Name: "someInstance",
@ -549,13 +579,64 @@ func TestList(t *testing.T) {
},
},
},
"multi-zone": {
imds: stubIMDS{
projectID: "someProject",
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: &fakeInstanceAPI{
instance: goodInstance,
iterators: map[string]*stubInstanceIterator{
"someZone-east1-a": {
instances: []*computepb.Instance{
otherZoneInstance,
},
},
"someZone-west3-b": {
instances: []*computepb.Instance{
goodInstance,
},
},
},
},
subnetAPI: stubSubnetAPI{
subnet: goodSubnet,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{Name: proto.String("someZone-east1-a")},
{Name: proto.String("someZone-west3-b")},
},
},
},
wantInstances: []metadata.InstanceMetadata{
{
Name: "otherZoneInstance",
Role: role.ControlPlane,
ProviderID: "gce://someProject/someZone-east1-a/otherZoneInstance",
VPCIP: "192.0.2.1",
AliasIPRanges: []string{"198.51.100.0/24"},
SecondaryIPRange: "198.51.100.0/24",
},
{
Name: "someInstance",
Role: role.ControlPlane,
ProviderID: "gce://someProject/someZone-west3-b/someInstance",
VPCIP: "192.0.2.0",
AliasIPRanges: []string{"198.51.100.0/24"},
SecondaryIPRange: "198.51.100.0/24",
},
},
},
"list multiple instances": {
imds: stubIMDS{
projectID: "someProject",
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instance: goodInstance,
iterator: &stubInstanceIterator{
instances: []*computepb.Instance{
@ -586,6 +667,15 @@ func TestList(t *testing.T) {
subnetAPI: stubSubnetAPI{
subnet: goodSubnet,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{
Name: proto.String("someZone-west3-b"),
},
},
},
},
wantInstances: []metadata.InstanceMetadata{
{
Name: "someInstance",
@ -609,7 +699,7 @@ func TestList(t *testing.T) {
imds: stubIMDS{
projectIDErr: someErr,
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instance: goodInstance,
iterator: &stubInstanceIterator{
instances: []*computepb.Instance{
@ -628,7 +718,7 @@ func TestList(t *testing.T) {
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instance: goodInstance,
iterator: &stubInstanceIterator{
err: someErr,
@ -637,6 +727,15 @@ func TestList(t *testing.T) {
subnetAPI: stubSubnetAPI{
subnet: goodSubnet,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{
Name: proto.String("someZone-west3-b"),
},
},
},
},
wantErr: true,
},
"get instance error": {
@ -645,7 +744,7 @@ func TestList(t *testing.T) {
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instanceErr: someErr,
iterator: &stubInstanceIterator{
instances: []*computepb.Instance{
@ -656,6 +755,15 @@ func TestList(t *testing.T) {
subnetAPI: stubSubnetAPI{
subnet: goodSubnet,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{
Name: proto.String("someZone-west3-b"),
},
},
},
},
wantErr: true,
},
"get subnet error": {
@ -664,7 +772,7 @@ func TestList(t *testing.T) {
zone: "someZone-west3-b",
instanceName: "someInstance",
},
instanceAPI: stubInstanceAPI{
instanceAPI: &stubInstanceAPI{
instance: goodInstance,
iterator: &stubInstanceIterator{
instances: []*computepb.Instance{
@ -675,6 +783,15 @@ func TestList(t *testing.T) {
subnetAPI: stubSubnetAPI{
subnetErr: someErr,
},
zoneAPI: stubZoneAPI{
iterator: &stubZoneIterator{
zones: []*computepb.Zone{
{
Name: proto.String("someZone-west3-b"),
},
},
},
},
wantErr: true,
},
}
@ -686,8 +803,9 @@ func TestList(t *testing.T) {
cloud := &Cloud{
imds: &tc.imds,
instanceAPI: &tc.instanceAPI,
instanceAPI: tc.instanceAPI,
subnetAPI: &tc.subnetAPI,
zoneAPI: &tc.zoneAPI,
}
instances, err := cloud.List(context.Background())
@ -701,6 +819,112 @@ func TestList(t *testing.T) {
}
}
func TestRegion(t *testing.T) {
someErr := errors.New("failed")
testCases := map[string]struct {
imds stubIMDS
wantRegion string
wantErr bool
}{
"success": {
imds: stubIMDS{
projectID: "someProject",
zone: "someregion-west3-b",
instanceName: "someInstance",
},
wantRegion: "someregion-west3",
},
"get zone error": {
imds: stubIMDS{
zoneErr: someErr,
},
wantErr: true,
},
"invalid zone format": {
imds: stubIMDS{
projectID: "someProject",
zone: "invalid",
instanceName: "someInstance",
},
wantErr: true,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
cloud := &Cloud{
imds: &tc.imds,
}
assert.Empty(cloud.regionCache)
gotRegion, err := cloud.region()
if tc.wantErr {
assert.Error(err)
return
}
assert.NoError(err)
assert.Equal(tc.wantRegion, gotRegion)
assert.Equal(tc.wantRegion, cloud.regionCache)
})
}
}
func TestZones(t *testing.T) {
someErr := errors.New("failed")
testCases := map[string]struct {
zoneAPI stubZoneAPI
wantZones []string
wantErr bool
}{
"success": {
zoneAPI: stubZoneAPI{
&stubZoneIterator{
zones: []*computepb.Zone{
{Name: proto.String("someregion-west3-b")},
{}, // missing name (should be ignored)
nil, // nil (should be ignored)
},
},
},
wantZones: []string{"someregion-west3-b"},
},
"get zones error": {
zoneAPI: stubZoneAPI{
&stubZoneIterator{
err: someErr,
},
},
wantErr: true,
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)
cloud := &Cloud{
zoneAPI: &tc.zoneAPI,
}
assert.Empty(cloud.zoneCache)
gotZones, err := cloud.zones(context.Background(), "someProject", "someregion-west3")
if tc.wantErr {
assert.Error(err)
return
}
assert.NoError(err)
assert.Equal(tc.wantZones, gotZones)
assert.Equal(tc.wantZones, cloud.zoneCache["someregion-west3"].zones)
})
}
}
func TestRetrieveInstanceInfo(t *testing.T) {
someErr := errors.New("failed")
@ -1035,6 +1259,27 @@ func (s *stubInstanceAPI) List(
func (s *stubInstanceAPI) Close() error { return nil }
type fakeInstanceAPI struct {
instance *computepb.Instance
instanceErr error
// iterators is a map of zone to instance iterator.
iterators map[string]*stubInstanceIterator
}
func (s *fakeInstanceAPI) Get(
_ context.Context, _ *computepb.GetInstanceRequest, _ ...gax.CallOption,
) (*computepb.Instance, error) {
return s.instance, s.instanceErr
}
func (s *fakeInstanceAPI) List(
_ context.Context, req *computepb.ListInstancesRequest, _ ...gax.CallOption,
) instanceIterator {
return s.iterators[req.GetZone()]
}
func (s *fakeInstanceAPI) Close() error { return nil }
type stubInstanceIterator struct {
ctr int
instances []*computepb.Instance
@ -1063,3 +1308,30 @@ func (s *stubSubnetAPI) Get(
return s.subnet, s.subnetErr
}
func (s *stubSubnetAPI) Close() error { return nil }
type stubZoneAPI struct {
iterator *stubZoneIterator
}
func (s *stubZoneAPI) List(_ context.Context,
_ *computepb.ListZonesRequest, _ ...gax.CallOption,
) zoneIterator {
return s.iterator
}
type stubZoneIterator struct {
ctr int
zones []*computepb.Zone
err error
}
func (s *stubZoneIterator) Next() (*computepb.Zone, error) {
if s.err != nil {
return nil, s.err
}
if s.ctr >= len(s.zones) {
return nil, iterator.Done
}
s.ctr++
return s.zones[s.ctr-1], nil
}

View File

@ -40,3 +40,7 @@ type subnetAPI interface {
Get(ctx context.Context, req *computepb.GetSubnetworkRequest, opts ...gax.CallOption) (*computepb.Subnetwork, error)
Close() error
}
type zoneAPI interface {
List(ctx context.Context, req *computepb.ListZonesRequest, opts ...gax.CallOption) zoneIterator
}

View File

@ -22,6 +22,10 @@ type instanceIterator interface {
Next() (*computepb.Instance, error)
}
type zoneIterator interface {
Next() (*computepb.Zone, error)
}
type globalForwardingRulesClient struct {
*compute.GlobalForwardingRulesClient
}
@ -63,3 +67,15 @@ func (c *instanceClient) List(ctx context.Context, req *computepb.ListInstancesR
) instanceIterator {
return c.InstancesClient.List(ctx, req)
}
type zoneClient struct {
*compute.ZonesClient
}
func (c *zoneClient) Close() error {
return c.ZonesClient.Close()
}
func (c *zoneClient) List(ctx context.Context, req *computepb.ListZonesRequest, opts ...gax.CallOption) zoneIterator {
return c.ZonesClient.List(ctx, req, opts...)
}