Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added exponential backoff on reconciliation failure #1460

Merged
merged 2 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions api/v1alpha2/terraform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ package v1alpha2
import (
"bytes"
"fmt"
"math"
"net"
"strings"
"time"
"unicode/utf8"

"github.com/flux-iac/tofu-controller/api/planid"
"github.com/fluxcd/pkg/apis/meta"
sourcev1 "github.com/fluxcd/source-controller/api/v1"
corev1 "k8s.io/api/core/v1"
Expand All @@ -33,6 +33,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"

"github.com/flux-iac/tofu-controller/api/planid"
)

const (
Expand Down Expand Up @@ -145,6 +147,21 @@ type TerraformSpec struct {
// +optional
RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`

// The strategy to use when retrying a previously failed reconciliation.
// The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
// The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
// maximum requeue duration of MaxRetryInterval.
// +kubebuilder:validation:Enum=StaticInterval;ExponentialBackoff
// +kubebuilder:default:string=StaticInterval
// +optional
RetryStrategy RetryStrategyEnum `json:"retryStrategy,omitempty"`

// The maximum requeue duration after a previously failed reconciliation.
// Only applicable when RetryStrategy is set to ExponentialBackoff.
// The default value is 24 hours when not specified.
// +optional
MaxRetryInterval *metav1.Duration `json:"maxRetryInterval,omitempty"`

// Path to the directory containing Terraform (.tf) files.
// Defaults to 'None', which translates to the root path of the SourceRef.
// +optional
Expand Down Expand Up @@ -521,6 +538,13 @@ const (
ForceUnlockEnumNo ForceUnlockEnum = "no"
)

type RetryStrategyEnum string

const (
StaticInterval RetryStrategyEnum = "StaticInterval"
ExponentialBackoff RetryStrategyEnum = "ExponentialBackoff"
)

const (
TerraformKind = "Terraform"
TerraformFinalizer = "finalizers.tf.contrib.fluxcd.io"
Expand Down Expand Up @@ -892,12 +916,24 @@ func (in Terraform) GetDependsOn() []meta.NamespacedObjectReference {

// GetRetryInterval returns the retry interval
func (in Terraform) GetRetryInterval() time.Duration {
retryInterval := 15 * time.Second
if in.Spec.RetryInterval != nil {
return in.Spec.RetryInterval.Duration
retryInterval = in.Spec.RetryInterval.Duration
}

if in.Spec.RetryStrategy == ExponentialBackoff {
retryInterval *= time.Duration(math.Pow(2, float64(in.Status.ReconciliationFailures)))
maxRetryInterval := 24 * time.Hour
if in.Spec.MaxRetryInterval != nil {
maxRetryInterval = in.Spec.MaxRetryInterval.Duration
}

if retryInterval > maxRetryInterval {
return maxRetryInterval
}
}

// The default retry interval is 15 seconds.
return 15 * time.Second
return retryInterval
}

// GetStatusConditions returns a pointer to the Status.Conditions slice.
Expand Down
81 changes: 81 additions & 0 deletions api/v1alpha2/terraform_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package v1alpha2

import (
"testing"
"time"

. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestGetRetryInterval(t *testing.T) {
g := NewGomegaWithT(t)

tests := []struct {
name string
terraform Terraform
expectedRetryInterval time.Duration
}{
{
name: "default retry interval",
terraform: Terraform{
Spec: TerraformSpec{},
},
expectedRetryInterval: 15 * time.Second,
},
{
name: "custom retry interval",
terraform: Terraform{
Spec: TerraformSpec{
RetryInterval: &metav1.Duration{Duration: 30 * time.Second},
},
},
expectedRetryInterval: 30 * time.Second,
},
{
name: "exponential backoff with default retry interval",
terraform: Terraform{
Spec: TerraformSpec{
RetryStrategy: ExponentialBackoff,
},
Status: TerraformStatus{
ReconciliationFailures: 2,
},
},
expectedRetryInterval: 60 * time.Second,
},
{
name: "exponential backoff",
terraform: Terraform{
Spec: TerraformSpec{
RetryStrategy: ExponentialBackoff,
RetryInterval: &metav1.Duration{Duration: 60 * time.Second},
},
Status: TerraformStatus{
ReconciliationFailures: 4,
},
},
expectedRetryInterval: 960 * time.Second,
},
{
name: "exponential backoff with max retry interval",
terraform: Terraform{
Spec: TerraformSpec{
RetryStrategy: ExponentialBackoff,
RetryInterval: &metav1.Duration{Duration: 60 * time.Second},
MaxRetryInterval: &metav1.Duration{Duration: 45 * time.Second},
},
Status: TerraformStatus{
ReconciliationFailures: 4,
},
},
expectedRetryInterval: 45 * time.Second,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g.Expect(tt.terraform.GetRetryInterval()).To(Equal(tt.expectedRetryInterval))
})
}
}
5 changes: 5 additions & 0 deletions api/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions charts/tofu-controller/crds/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5536,6 +5536,12 @@ spec:
interval:
description: The interval at which to reconcile the Terraform.
type: string
maxRetryInterval:
description: |-
The maximum requeue duration after a previously failed reconciliation.
Only applicable when RetryStrategy is set to ExponentialBackoff.
The default value is 24 hours when not specified.
type: string
parallelism:
default: 0
description: Parallelism limits the number of concurrent operations
Expand Down Expand Up @@ -5587,6 +5593,17 @@ spec:
The interval at which to retry a previously failed reconciliation.
The default value is 15 when not specified.
type: string
retryStrategy:
default: StaticInterval
description: |-
The strategy to use when retrying a previously failed reconciliation.
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
maximum requeue duration of MaxRetryInterval.
enum:
- StaticInterval
- ExponentialBackoff
type: string
runnerPodTemplate:
properties:
metadata:
Expand Down
17 changes: 17 additions & 0 deletions config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5536,6 +5536,12 @@ spec:
interval:
description: The interval at which to reconcile the Terraform.
type: string
maxRetryInterval:
description: |-
The maximum requeue duration after a previously failed reconciliation.
Only applicable when RetryStrategy is set to ExponentialBackoff.
The default value is 24 hours when not specified.
type: string
parallelism:
default: 0
description: Parallelism limits the number of concurrent operations
Expand Down Expand Up @@ -5587,6 +5593,17 @@ spec:
The interval at which to retry a previously failed reconciliation.
The default value is 15 when not specified.
type: string
retryStrategy:
default: StaticInterval
description: |-
The strategy to use when retrying a previously failed reconciliation.
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
maximum requeue duration of MaxRetryInterval.
enum:
- StaticInterval
- ExponentialBackoff
type: string
runnerPodTemplate:
properties:
metadata:
Expand Down
72 changes: 72 additions & 0 deletions docs/References/terraform.md
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,12 @@ string
</table>
</div>
</div>
<h3 id="infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">RetryStrategyEnum
(<code>string</code> alias)</h3>
<p>
(<em>Appears on:</em>
<a href="#infra.contrib.fluxcd.io/v1alpha2.TerraformSpec">TerraformSpec</a>)
</p>
<h3 id="infra.contrib.fluxcd.io/v1alpha2.RunnerPodMetadata">RunnerPodMetadata
</h3>
<p>
Expand Down Expand Up @@ -1568,6 +1574,39 @@ The default value is 15 when not specified.</p>
</tr>
<tr>
<td>
<code>retryStrategy</code><br>
<em>
<a href="#infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">
RetryStrategyEnum
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>The strategy to use when retrying a previously failed reconciliation.
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
maximum requeue duration of MaxRetryInterval.</p>
</td>
</tr>
<tr>
<td>
<code>maxRetryInterval</code><br>
<em>
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
Kubernetes meta/v1.Duration
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>The maximum requeue duration after a previously failed reconciliation.
Only applicable when RetryStrategy is set to ExponentialBackoff.
The default value is 24 hours when not specified.</p>
</td>
</tr>
<tr>
<td>
<code>path</code><br>
<em>
string
Expand Down Expand Up @@ -2131,6 +2170,39 @@ The default value is 15 when not specified.</p>
</tr>
<tr>
<td>
<code>retryStrategy</code><br>
<em>
<a href="#infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">
RetryStrategyEnum
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>The strategy to use when retrying a previously failed reconciliation.
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
maximum requeue duration of MaxRetryInterval.</p>
</td>
</tr>
<tr>
<td>
<code>maxRetryInterval</code><br>
<em>
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
Kubernetes meta/v1.Duration
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>The maximum requeue duration after a previously failed reconciliation.
Only applicable when RetryStrategy is set to ExponentialBackoff.
The default value is 24 hours when not specified.</p>
</td>
</tr>
<tr>
<td>
<code>path</code><br>
<em>
string
Expand Down