Skip to content

Commit

Permalink
fix: health check timeout for services/NLB (#2899)
Browse files Browse the repository at this point in the history
* fix: health check timeout for services/NLB

The annotation for health check timeout was not consumed at all for
services

* add feature gate ServiceHealthCheckTimeout

allows people to set old behavior

* feat: rename featuregate
  • Loading branch information
project0 authored Dec 2, 2022
1 parent b5e9427 commit 2e740a7
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 9 deletions.
5 changes: 3 additions & 2 deletions docs/deploy/configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ This document covers configuration of the AWS Load Balancer controller

!!!warning "limitation"
The v2.0.0+ version of AWSLoadBalancerController currently only support one controller deployment(with one or multiple replicas) per cluster.

The AWSLoadBalancerController assumes it's the solo owner of worker node security group rules with `elbv2.k8s.aws/targetGroupBinding=shared` description, running multiple controller deployment will cause these controllers compete with each other updating worker node security group rules.

We will remove this limitation in future versions: [tracking issue](https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/2185)

## AWS API Access
Expand Down Expand Up @@ -158,3 +158,4 @@ They are a set of kye=value pairs that describe AWS load balance controller feat
| EnableServiceController | string | true | Toggles support for `Service` type resources. |
| EnableIPTargetType | string | true | Used to toggle support for target-type `ip` across `Ingress` and `Service` type resources. |
| SubnetsClusterTagCheck | string | true | Enable or disable the check for `kubernetes.io/cluster/${cluster-name}` during subnet auto-discovery |
| NLBHealthCheckTimeout | string | true | Enable or disable the use of `service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout` for `Service` type resources (NLB) |
5 changes: 4 additions & 1 deletion pkg/config/feature_gates.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package config

import (
"fmt"
"github.com/spf13/pflag"
"strconv"
"strings"

"github.com/spf13/pflag"
)

type Feature string
Expand All @@ -17,6 +18,7 @@ const (
EnableServiceController Feature = "EnableServiceController"
EnableIPTargetType Feature = "EnableIPTargetType"
SubnetsClusterTagCheck Feature = "SubnetsClusterTagCheck"
NLBHealthCheckTimeout Feature = "NLBHealthCheckTimeout"
)

type FeatureGates interface {
Expand Down Expand Up @@ -51,6 +53,7 @@ func NewFeatureGates() FeatureGates {
EnableServiceController: true,
EnableIPTargetType: true,
SubnetsClusterTagCheck: true,
NLBHealthCheckTimeout: true,
},
}
}
Expand Down
23 changes: 20 additions & 3 deletions pkg/service/model_build_target_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"k8s.io/apimachinery/pkg/util/intstr"
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
"sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
"sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
Expand Down Expand Up @@ -113,6 +114,13 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigDefault(ctx con
if err != nil {
return nil, err
}
var healthCheckTimeoutSeconds *int64
if t.featureGates.Enabled(config.NLBHealthCheckTimeout) {
healthCheckTimeoutSeconds, err = t.buildTargetGroupHealthCheckTimeoutSeconds(ctx, t.defaultHealthCheckTimeout)
if err != nil {
return nil, err
}
}
healthyThresholdCount, err := t.buildTargetGroupHealthCheckHealthyThresholdCount(ctx, t.defaultHealthCheckHealthyThreshold)
if err != nil {
return nil, err
Expand All @@ -126,6 +134,7 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigDefault(ctx con
Protocol: &healthCheckProtocol,
Path: healthCheckPathPtr,
IntervalSeconds: &intervalSeconds,
TimeoutSeconds: healthCheckTimeoutSeconds,
HealthyThresholdCount: &healthyThresholdCount,
UnhealthyThresholdCount: &unhealthyThresholdCount,
}, nil
Expand All @@ -148,6 +157,13 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigForInstanceMode
if err != nil {
return nil, err
}
var healthCheckTimeoutSeconds *int64
if t.featureGates.Enabled(config.NLBHealthCheckTimeout) {
healthCheckTimeoutSeconds, err = t.buildTargetGroupHealthCheckTimeoutSeconds(ctx, t.defaultHealthCheckTimeoutForInstanceModeLocal)
if err != nil {
return nil, err
}
}
healthyThresholdCount, err := t.buildTargetGroupHealthCheckHealthyThresholdCount(ctx, t.defaultHealthCheckHealthyThresholdForInstanceModeLocal)
if err != nil {
return nil, err
Expand All @@ -161,6 +177,7 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigForInstanceMode
Protocol: &healthCheckProtocol,
Path: healthCheckPathPtr,
IntervalSeconds: &intervalSeconds,
TimeoutSeconds: healthCheckTimeoutSeconds,
HealthyThresholdCount: &healthyThresholdCount,
UnhealthyThresholdCount: &unhealthyThresholdCount,
}, nil
Expand Down Expand Up @@ -308,12 +325,12 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckIntervalSeconds(_ con
return intervalSeconds, nil
}

func (t *defaultModelBuildTask) buildTargetGroupHealthCheckTimeoutSeconds(_ context.Context, defaultHealthCheckTimeout int64) (int64, error) {
func (t *defaultModelBuildTask) buildTargetGroupHealthCheckTimeoutSeconds(_ context.Context, defaultHealthCheckTimeout int64) (*int64, error) {
timeoutSeconds := defaultHealthCheckTimeout
if _, err := t.annotationParser.ParseInt64Annotation(annotations.SvcLBSuffixHCTimeout, &timeoutSeconds, t.service.Annotations); err != nil {
return 0, err
return nil, err
}
return timeoutSeconds, nil
return &timeoutSeconds, nil
}

func (t *defaultModelBuildTask) buildTargetGroupHealthCheckHealthyThresholdCount(_ context.Context, defaultHealthCheckHealthyThreshold int64) (int64, error) {
Expand Down
8 changes: 8 additions & 0 deletions pkg/service/model_build_target_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"k8s.io/apimachinery/pkg/util/intstr"
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
"sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
)

Expand Down Expand Up @@ -183,6 +184,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Port: &trafficPort,
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(10),
HealthyThresholdCount: aws.Int64(3),
UnhealthyThresholdCount: aws.Int64(3),
},
Expand All @@ -209,6 +211,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Protocol: (*elbv2.Protocol)(aws.String("HTTP")),
Path: aws.String("/healthz"),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(30),
HealthyThresholdCount: aws.Int64(2),
UnhealthyThresholdCount: aws.Int64(2),
},
Expand All @@ -229,6 +232,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Protocol: (*elbv2.Protocol)(aws.String("HTTP")),
Path: aws.String("/"),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(10),
HealthyThresholdCount: aws.Int64(3),
UnhealthyThresholdCount: aws.Int64(3),
},
Expand Down Expand Up @@ -284,6 +288,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Port: &trafficPort,
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(10),
HealthyThresholdCount: aws.Int64(3),
UnhealthyThresholdCount: aws.Int64(3),
},
Expand All @@ -304,6 +309,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolHTTP))),
Path: aws.String("/healthz"),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(6),
HealthyThresholdCount: aws.Int64(2),
UnhealthyThresholdCount: aws.Int64(2),
},
Expand Down Expand Up @@ -333,6 +339,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
Port: &port8888,
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
IntervalSeconds: aws.Int64(10),
TimeoutSeconds: aws.Int64(30),
HealthyThresholdCount: aws.Int64(5),
UnhealthyThresholdCount: aws.Int64(5),
},
Expand All @@ -345,6 +352,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
builder := &defaultModelBuildTask{
service: tt.svc,
annotationParser: parser,
featureGates: config.NewFeatureGates(),
defaultAccessLogsS3Bucket: "",
defaultAccessLogsS3Prefix: "",
defaultLoadBalancingCrossZoneEnabled: false,
Expand Down
22 changes: 19 additions & 3 deletions pkg/service/model_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port":"traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand Down Expand Up @@ -334,6 +335,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port":"traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand Down Expand Up @@ -518,6 +520,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"protocol":"HTTP",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":30,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand All @@ -541,6 +544,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"protocol":"HTTP",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":30,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand Down Expand Up @@ -852,6 +856,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"protocol":"HTTP",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":30,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand All @@ -875,6 +880,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"protocol":"HTTP",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":30,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand Down Expand Up @@ -1180,6 +1186,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port": "traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand All @@ -1202,6 +1209,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port":"traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand Down Expand Up @@ -1448,8 +1456,9 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"healthCheckConfig":{
"port": 29123,
"protocol":"HTTP",
"path":"/healthz",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":6,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand All @@ -1471,8 +1480,9 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"healthCheckConfig":{
"port": 29123,
"protocol":"HTTP",
"path":"/healthz",
"path":"/healthz",
"intervalSeconds":10,
"timeoutSeconds":6,
"healthyThresholdCount":2,
"unhealthyThresholdCount":2
},
Expand Down Expand Up @@ -1729,6 +1739,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port":"traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand Down Expand Up @@ -1941,7 +1952,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"unhealthyThresholdCount": 3,
"protocol": "TCP",
"port": "traffic-port",
"intervalSeconds": 10
"intervalSeconds": 10,
"timeoutSeconds":10
},
"targetGroupAttributes": [
{
Expand Down Expand Up @@ -2210,6 +2222,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port": "traffic-port",
"protocol": "TCP",
"intervalSeconds": 10,
"timeoutSeconds":10,
"healthyThresholdCount": 3,
"unhealthyThresholdCount": 3
},
Expand Down Expand Up @@ -2355,6 +2368,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port": "traffic-port",
"protocol": "TCP",
"intervalSeconds": 10,
"timeoutSeconds":10,
"healthyThresholdCount": 3,
"unhealthyThresholdCount": 3
},
Expand Down Expand Up @@ -2551,6 +2565,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port": "traffic-port",
"protocol": "TCP",
"intervalSeconds": 10,
"timeoutSeconds":10,
"healthyThresholdCount": 3,
"unhealthyThresholdCount": 3
},
Expand Down Expand Up @@ -2693,6 +2708,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
"port":"traffic-port",
"protocol":"TCP",
"intervalSeconds":10,
"timeoutSeconds":10,
"healthyThresholdCount":3,
"unhealthyThresholdCount":3
},
Expand Down

0 comments on commit 2e740a7

Please sign in to comment.