Skip to content

Commit

Permalink
suppor deleteSnapshot retries
Browse files Browse the repository at this point in the history
  • Loading branch information
sam-heilbron committed Jul 11, 2023
1 parent 83eca56 commit 9b50a48
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 29 deletions.
7 changes: 7 additions & 0 deletions changelog/v1.15.0-beta18/snapshot-writer-delete-retry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
changelog:
- type: NON_USER_FACING
issueLink: https://github.com/solo-io/solo-projects/issues/5162
resolvesIssue: false
description: >-
Support retries in the SnapshotWriter DeleteSnapshot operation. This mirrors the logic
already supported in the WriteSnapshot operation.
72 changes: 53 additions & 19 deletions test/helpers/snapshot_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ type SnapshotWriter interface {

type SnapshotWriterImpl struct {
ResourceClientSet

// retryOptions is the criteria for retrying a Snapshot Write or Delete operation.
// Due to the eventually consistent nature of Gloo Edge, when applying changes in bulk,
// parent resources may be rejected by the validation webhook, if the Gloo hasn't processed the child
// resources. A more thorough solution would be to support bulk applies of resources.
// In the interim however, we retry the operation
retryOptions []retry.Option

// writeNamespace is the namespace that the SnapshotWriter expects resources to be written to by Gloo
Expand All @@ -30,7 +36,7 @@ type SnapshotWriterImpl struct {
writeNamespace string
}

func NewSnapshotWriter(clientSet ResourceClientSet, retryOptions []retry.Option) *SnapshotWriterImpl {
func NewSnapshotWriter(clientSet ResourceClientSet) *SnapshotWriterImpl {
defaultRetryOptions := []retry.Option{
retry.Attempts(3),
retry.RetryIf(func(err error) bool {
Expand All @@ -43,7 +49,7 @@ func NewSnapshotWriter(clientSet ResourceClientSet, retryOptions []retry.Option)

return &SnapshotWriterImpl{
ResourceClientSet: clientSet,
retryOptions: append(defaultRetryOptions, retryOptions...),
retryOptions: defaultRetryOptions,
// By default, Gloo will write resources to the gloo-system namespace
// This can be overridden by setting the WithNamespace option on the SnapshotWriter
writeNamespace: defaults.GlooSystem,
Expand All @@ -57,7 +63,13 @@ func (s *SnapshotWriterImpl) WithWriteNamespace(writeNamespace string) *Snapshot
return s
}

// WriteSnapshot writes all resources in the ApiSnapshot to the cache
// WithRetryOptions appends the retryOptions that the SnapshotWriter relies on to the default retry options
func (s *SnapshotWriterImpl) WithRetryOptions(retryOptions []retry.Option) *SnapshotWriterImpl {
s.retryOptions = append(s.retryOptions, retryOptions...)
return s
}

// WriteSnapshot writes all resources in the ApiSnapshot to the cache, retrying the operation based on the retryOptions
func (s *SnapshotWriterImpl) WriteSnapshot(snapshot *gloosnapshot.ApiSnapshot, writeOptions clients.WriteOpts) error {
return retry.Do(func() error {
if writeOptions.Ctx.Err() != nil {
Expand All @@ -69,7 +81,7 @@ func (s *SnapshotWriterImpl) WriteSnapshot(snapshot *gloosnapshot.ApiSnapshot, w
}, s.retryOptions...)
}

// WriteSnapshot writes all resources in the ApiSnapshot to the cache
// doWriteSnapshot attempts to write all resources in the ApiSnapshot to the cache once, or returns an error
func (s *SnapshotWriterImpl) doWriteSnapshot(snapshot *gloosnapshot.ApiSnapshot, writeOptions clients.WriteOpts) error {
// We intentionally create child resources first to avoid having the validation webhook reject
// the parent resource
Expand Down Expand Up @@ -156,86 +168,98 @@ func (s *SnapshotWriterImpl) isContinuableWriteError(writeError error) bool {
return errors.IsExist(writeError)
}

// DeleteSnapshot deletes all resources in the ApiSnapshot from the cache
// DeleteSnapshot deletes all resources in the ApiSnapshot from the cache, retrying the operation based on the retryOptions
func (s *SnapshotWriterImpl) DeleteSnapshot(snapshot *gloosnapshot.ApiSnapshot, deleteOptions clients.DeleteOpts) error {
return retry.Do(func() error {
if deleteOptions.Ctx.Err() != nil {
// intentionally return early if context is already done
// this is a backoff loop; by the time we get here ctx may be done
return nil
}
return s.doDeleteSnapshot(snapshot, deleteOptions)
}, s.retryOptions...)
}

// doDeleteSnapshot attempts to delete all resources in the ApiSnapshot from the cache once, or returns an error
func (s *SnapshotWriterImpl) doDeleteSnapshot(snapshot *gloosnapshot.ApiSnapshot, deleteOptions clients.DeleteOpts) error {
// We intentionally delete resources in the reverse order that we create resources
// If we delete child resources first, the validation webhook may reject the change

for _, gw := range snapshot.Gateways {
gwNamespace, gwName := gw.GetMetadata().Ref().Strings()
if deleteErr := s.GatewayClient().Delete(gwNamespace, gwName, deleteOptions); deleteErr != nil {
if deleteErr := s.GatewayClient().Delete(gwNamespace, gwName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, hgw := range snapshot.HttpGateways {
hgwNamespace, hgwName := hgw.GetMetadata().Ref().Strings()
if deleteErr := s.HttpGatewayClient().Delete(hgwNamespace, hgwName, deleteOptions); deleteErr != nil {
if deleteErr := s.HttpGatewayClient().Delete(hgwNamespace, hgwName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, tgw := range snapshot.TcpGateways {
tgwNamespace, tgwName := tgw.GetMetadata().Ref().Strings()
if deleteErr := s.TcpGatewayClient().Delete(tgwNamespace, tgwName, deleteOptions); deleteErr != nil {
if deleteErr := s.TcpGatewayClient().Delete(tgwNamespace, tgwName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, vs := range snapshot.VirtualServices {
vsNamespace, vsName := vs.GetMetadata().Ref().Strings()
if deleteErr := s.VirtualServiceClient().Delete(vsNamespace, vsName, deleteOptions); deleteErr != nil {
if deleteErr := s.VirtualServiceClient().Delete(vsNamespace, vsName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, rt := range snapshot.RouteTables {
rtNamespace, rtName := rt.GetMetadata().Ref().Strings()
if deleteErr := s.RouteTableClient().Delete(rtNamespace, rtName, deleteOptions); deleteErr != nil {
if deleteErr := s.RouteTableClient().Delete(rtNamespace, rtName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, ac := range snapshot.AuthConfigs {
acNamespace, acName := ac.GetMetadata().Ref().Strings()
if deleteErr := s.AuthConfigClient().Delete(acNamespace, acName, deleteOptions); deleteErr != nil {
if deleteErr := s.AuthConfigClient().Delete(acNamespace, acName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, rlc := range snapshot.Ratelimitconfigs {
rlcNamespace, rlcName := rlc.GetMetadata().Ref().Strings()
if deleteErr := s.RateLimitConfigClient().Delete(rlcNamespace, rlcName, deleteOptions); deleteErr != nil {
if deleteErr := s.RateLimitConfigClient().Delete(rlcNamespace, rlcName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, vhOpt := range snapshot.VirtualHostOptions {
vhOptNamespace, vhOptName := vhOpt.GetMetadata().Ref().Strings()
if deleteErr := s.VirtualHostOptionClient().Delete(vhOptNamespace, vhOptName, deleteOptions); deleteErr != nil {
if deleteErr := s.VirtualHostOptionClient().Delete(vhOptNamespace, vhOptName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, rtOpt := range snapshot.RouteOptions {
rtOptNamespace, rtOptName := rtOpt.GetMetadata().Ref().Strings()
if deleteErr := s.RouteOptionClient().Delete(rtOptNamespace, rtOptName, deleteOptions); deleteErr != nil {
if deleteErr := s.RouteOptionClient().Delete(rtOptNamespace, rtOptName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, usGroup := range snapshot.UpstreamGroups {
usGroupNamespace, usGroupName := usGroup.GetMetadata().Ref().Strings()
if deleteErr := s.UpstreamGroupClient().Delete(usGroupNamespace, usGroupName, deleteOptions); deleteErr != nil {
if deleteErr := s.UpstreamGroupClient().Delete(usGroupNamespace, usGroupName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, us := range snapshot.Upstreams {
usNamespace, usName := us.GetMetadata().Ref().Strings()
if deleteErr := s.UpstreamClient().Delete(usNamespace, usName, deleteOptions); deleteErr != nil {
if deleteErr := s.UpstreamClient().Delete(usNamespace, usName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, secret := range snapshot.Secrets {
secretNamespace, secretName := secret.GetMetadata().Ref().Strings()
if deleteErr := s.SecretClient().Delete(secretNamespace, secretName, deleteOptions); deleteErr != nil {
if deleteErr := s.SecretClient().Delete(secretNamespace, secretName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
for _, artifact := range snapshot.Artifacts {
artifactNamespace, artifactName := artifact.GetMetadata().Ref().Strings()
if deleteErr := s.ArtifactClient().Delete(artifactNamespace, artifactName, deleteOptions); deleteErr != nil {
if deleteErr := s.ArtifactClient().Delete(artifactNamespace, artifactName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}
Expand All @@ -251,10 +275,20 @@ func (s *SnapshotWriterImpl) DeleteSnapshot(snapshot *gloosnapshot.ApiSnapshot,
}
for _, proxy := range proxies {
proxyNamespace, proxyName := proxy.GetMetadata().Ref().Strings()
if deleteErr := s.ProxyClient().Delete(proxyNamespace, proxyName, deleteOptions); deleteErr != nil {
if deleteErr := s.ProxyClient().Delete(proxyNamespace, proxyName, deleteOptions); !s.isContinuableDeleteError(deleteErr) {
return deleteErr
}
}

return nil
}

func (s *SnapshotWriterImpl) isContinuableDeleteError(deleteError error) bool {
if deleteError == nil {
return true
}

// Since we delete resources in bulk, with retries, we may hit a case where a resource doesn't exist
// We can ignore that error and continue to try to delete other resources in the Snapshot
return errors.IsNotExist(deleteError)
}
4 changes: 1 addition & 3 deletions test/kube2e/gateway/gateway_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ import (

kubeutils2 "github.com/solo-io/gloo/test/testutils"

"github.com/avast/retry-go"

gatewaydefaults "github.com/solo-io/gloo/projects/gateway/pkg/defaults"

gloodefaults "github.com/solo-io/gloo/projects/gloo/pkg/defaults"
Expand Down Expand Up @@ -67,7 +65,7 @@ func StartTestHelper() {
resourceClientset, err = kube2e.NewDefaultKubeResourceClientSet(ctx)
Expect(err).NotTo(HaveOccurred(), "can create kube resource client set")

snapshotWriter = helpers.NewSnapshotWriter(resourceClientset, []retry.Option{}).WithWriteNamespace(testHelper.InstallNamespace)
snapshotWriter = helpers.NewSnapshotWriter(resourceClientset).WithWriteNamespace(testHelper.InstallNamespace)
}

func TearDownTestHelper() {
Expand Down
3 changes: 1 addition & 2 deletions test/kube2e/gloo/gloo_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (

"github.com/solo-io/gloo/test/services"

"github.com/avast/retry-go"
"github.com/solo-io/gloo/projects/gloo/pkg/defaults"
"github.com/solo-io/gloo/test/helpers"
"github.com/solo-io/gloo/test/kube2e"
Expand Down Expand Up @@ -63,7 +62,7 @@ var _ = BeforeSuite(func() {
resourceClientset, err = kube2e.NewDefaultKubeResourceClientSet(ctx)
Expect(err).NotTo(HaveOccurred(), "can create kube resource client set")

snapshotWriter = helpers.NewSnapshotWriter(resourceClientset, []retry.Option{}).WithWriteNamespace(testHelper.InstallNamespace)
snapshotWriter = helpers.NewSnapshotWriter(resourceClientset).WithWriteNamespace(testHelper.InstallNamespace)

envoyFactory = envoy.NewFactory()

Expand Down
3 changes: 1 addition & 2 deletions test/kube2e/gloomtls/gloo_mtls_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/avast/retry-go"
"github.com/solo-io/gloo/test/kube2e"

"github.com/solo-io/gloo/projects/gloo/pkg/defaults"
Expand Down Expand Up @@ -64,7 +63,7 @@ func StartTestHelper() {
resourceClientset, err = kube2e.NewDefaultKubeResourceClientSet(ctx)
Expect(err).NotTo(HaveOccurred(), "can create kube resource client set")

snapshotWriter = helpers.NewSnapshotWriter(resourceClientset, []retry.Option{}).WithWriteNamespace(testHelper.InstallNamespace)
snapshotWriter = helpers.NewSnapshotWriter(resourceClientset).WithWriteNamespace(testHelper.InstallNamespace)
}

func TearDownTestHelper() {
Expand Down
4 changes: 1 addition & 3 deletions test/kube2e/gloomtls/gloo_mtls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ import (
var _ = Describe("Kube2e: mTLS", func() {

var (
testRunnerVs *gatewayv1.VirtualService

glooResources *gloosnapshot.ApiSnapshot
)

Expand All @@ -42,7 +40,7 @@ var _ = Describe("Kube2e: mTLS", func() {
},
},
}
testRunnerVs = helpers.NewVirtualServiceBuilder().
testRunnerVs := helpers.NewVirtualServiceBuilder().
WithName(helper.TestrunnerName).
WithNamespace(testHelper.InstallNamespace).
WithDomain(helper.TestrunnerName).
Expand Down

0 comments on commit 9b50a48

Please sign in to comment.