diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go index 8977e3bf7e..070b608641 100644 --- a/pkg/providers/v1/aws.go +++ b/pkg/providers/v1/aws.go @@ -2072,6 +2072,7 @@ func (c *Cloud) buildNLBHealthCheckConfiguration(svc *v1.Service) (healthCheckCo UnhealthyThreshold: 2, } } + if parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckProtocol, &hc.Protocol) { hc.Protocol = strings.ToUpper(hc.Protocol) } @@ -2086,6 +2087,31 @@ func (c *Cloud) buildNLBHealthCheckConfiguration(svc *v1.Service) (healthCheckCo parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckPort, &hc.Port) + switch c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode { + case config.ClusterServiceLoadBalancerHealthProbeModeShared: + // For a non-local service, we override the health check to use the kube-proxy port when no other overrides are provided. + // The kube-proxy port should be open on all nodes and allows the health check to check the nodes ability to proxy traffic. + // When the node is shutting down, the health check should fail before the node loses the ability to route traffic to the backend pod. + // This allows the load balancer to gracefully drain connections from the node. + if svc.Spec.ExternalTrafficPolicy != v1.ServiceExternalTrafficPolicyTypeLocal { + hc.Path = defaultKubeProxyHealthCheckPath + if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath != "" { + hc.Path = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath + } + + hc.Port = strconv.Itoa(int(defaultKubeProxyHealthCheckPort)) + if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort != 0 { + hc.Port = strconv.Itoa(int(c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort)) + } + + hc.Protocol = elbv2.ProtocolEnumHttp + } + case config.ClusterServiceLoadBalancerHealthProbeModeServiceNodePort, "": + // Configuration is already up to date as this is the default case. + default: + return healthCheckConfig{}, fmt.Errorf("Unsupported ClusterServiceLoadBalancerHealthProbeMode %v", c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode) + } + if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCInterval, &hc.Interval); err != nil { return healthCheckConfig{}, err } @@ -2484,6 +2510,9 @@ func (c *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, apiS } } else { klog.V(4).Infof("service %v does not need custom health checks", apiService.Name) + var hcPath string + hcPort := tcpHealthCheckPort + annotationProtocol := strings.ToLower(annotations[ServiceAnnotationLoadBalancerBEProtocol]) var hcProtocol string if annotationProtocol == "https" || annotationProtocol == "ssl" { @@ -2491,8 +2520,23 @@ func (c *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, apiS } else { hcProtocol = "TCP" } - // there must be no path on TCP health check - err = c.ensureLoadBalancerHealthCheck(loadBalancer, hcProtocol, tcpHealthCheckPort, "", annotations) + + if c.cfg.Global.ClusterServiceLoadBalancerHealthProbeMode == config.ClusterServiceLoadBalancerHealthProbeModeShared { + // Use the kube-proxy port as the health check port for non-local services. + hcProtocol = "HTTP" + hcPath = defaultKubeProxyHealthCheckPath + hcPort = int32(defaultKubeProxyHealthCheckPort) + + if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath != "" { + hcPath = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath + } + + if c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort != 0 { + hcPort = c.cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort + } + } + + err = c.ensureLoadBalancerHealthCheck(loadBalancer, hcProtocol, hcPort, hcPath, annotations) if err != nil { return nil, err } diff --git a/pkg/providers/v1/aws_loadbalancer.go b/pkg/providers/v1/aws_loadbalancer.go index c39ea3de37..5e170f1280 100644 --- a/pkg/providers/v1/aws_loadbalancer.go +++ b/pkg/providers/v1/aws_loadbalancer.go @@ -68,6 +68,9 @@ var ( defaultHealthCheckPort = "traffic-port" defaultHealthCheckPath = "/" + defaultKubeProxyHealthCheckPort = 10256 + defaultKubeProxyHealthCheckPath = "/healthz" + // Defaults for ELB Target operations defaultRegisterTargetsChunkSize = 100 defaultDeregisterTargetsChunkSize = 100 diff --git a/pkg/providers/v1/aws_test.go b/pkg/providers/v1/aws_test.go index 577f5d72cf..ba16b32bf7 100644 --- a/pkg/providers/v1/aws_test.go +++ b/pkg/providers/v1/aws_test.go @@ -3180,11 +3180,12 @@ func TestCloud_sortELBSecurityGroupList(t *testing.T) { func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) { tests := []struct { - name string - annotations map[string]string - service *v1.Service - want healthCheckConfig - wantError bool + name string + annotations map[string]string + service *v1.Service + modifyConfig func(*config.CloudConfig) + want healthCheckConfig + wantError bool }{ { name: "default cluster", @@ -3216,6 +3217,110 @@ func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) { }, wantError: false, }, + { + name: "default cluster with shared health check", + annotations: map[string]string{}, + service: &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-svc", + UID: "UID", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Name: "http", + Protocol: v1.ProtocolTCP, + Port: 8080, + TargetPort: intstr.FromInt(8880), + NodePort: 32205, + }, + }, + }, + }, + modifyConfig: func(cfg *config.CloudConfig) { + cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared + }, + want: healthCheckConfig{ + Port: "10256", + Protocol: elbv2.ProtocolEnumHttp, + Path: "/healthz", + Interval: 30, + Timeout: 10, + HealthyThreshold: 3, + UnhealthyThreshold: 3, + }, + wantError: false, + }, + { + name: "default cluster with shared health check and custom port", + annotations: map[string]string{}, + service: &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-svc", + UID: "UID", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Name: "http", + Protocol: v1.ProtocolTCP, + Port: 8080, + TargetPort: intstr.FromInt(8880), + NodePort: 32205, + }, + }, + }, + }, + modifyConfig: func(cfg *config.CloudConfig) { + cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared + cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePort = 8080 + }, + want: healthCheckConfig{ + Port: "8080", + Protocol: elbv2.ProtocolEnumHttp, + Path: "/healthz", + Interval: 30, + Timeout: 10, + HealthyThreshold: 3, + UnhealthyThreshold: 3, + }, + wantError: false, + }, + { + name: "default cluster with shared health check and custom path", + annotations: map[string]string{}, + service: &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-svc", + UID: "UID", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Name: "http", + Protocol: v1.ProtocolTCP, + Port: 8080, + TargetPort: intstr.FromInt(8880), + NodePort: 32205, + }, + }, + }, + }, + modifyConfig: func(cfg *config.CloudConfig) { + cfg.Global.ClusterServiceLoadBalancerHealthProbeMode = config.ClusterServiceLoadBalancerHealthProbeModeShared + cfg.Global.ClusterServiceSharedLoadBalancerHealthProbePath = "/custom-healthz" + }, + want: healthCheckConfig{ + Port: "10256", + Protocol: elbv2.ProtocolEnumHttp, + Path: "/custom-healthz", + Interval: 30, + Timeout: 10, + HealthyThreshold: 3, + UnhealthyThreshold: 3, + }, + wantError: false, + }, { name: "default local", annotations: map[string]string{}, @@ -3457,7 +3562,14 @@ func TestCloud_buildNLBHealthCheckConfiguration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := &Cloud{} + c := &Cloud{ + cfg: &config.CloudConfig{}, + } + + if tt.modifyConfig != nil { + tt.modifyConfig(c.cfg) + } + hc, err := c.buildNLBHealthCheckConfiguration(tt.service) if !tt.wantError { assert.Equal(t, tt.want, hc) diff --git a/pkg/providers/v1/config/config.go b/pkg/providers/v1/config/config.go index ef6e371115..efae450ed8 100644 --- a/pkg/providers/v1/config/config.go +++ b/pkg/providers/v1/config/config.go @@ -2,14 +2,23 @@ package config import ( "fmt" - "github.com/aws/aws-sdk-go/aws/request" "strings" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/aws/endpoints" "k8s.io/klog/v2" ) +const ( + // ClusterServiceLoadBalancerHealthProbeModeShared is the shared health probe mode for cluster service load balancer. + ClusterServiceLoadBalancerHealthProbeModeShared = "Shared" + + // ClusterServiceLoadBalancerHealthProbeModeServiceNodePort is the service node port health probe mode for cluster service load balancer. + ClusterServiceLoadBalancerHealthProbeModeServiceNodePort = "ServiceNodePort" +) + // CloudConfig wraps the settings for the AWS cloud provider. // NOTE: Cloud config files should follow the same Kubernetes deprecation policy as // flags or CLIs. Config fields should not change behavior in incompatible ways and @@ -62,6 +71,18 @@ type CloudConfig struct { // NodeIPFamilies determines which IP addresses are added to node objects and their ordering. NodeIPFamilies []string + + // ClusterServiceLoadBalancerHealthProbeMode determines the health probe mode for cluster service load balancer. + // Supported values are `Shared` and `ServiceNodePort`. + // `ServiceeNodePort`: the health probe will be created against each port of each service by watching the backend application (default). + // `Shared`: all cluster services shares one HTTP probe targeting the kube-proxy on the node (/healthz:10256). + ClusterServiceLoadBalancerHealthProbeMode string `json:"clusterServiceLoadBalancerHealthProbeMode,omitempty" yaml:"clusterServiceLoadBalancerHealthProbeMode,omitempty"` + + // ClusterServiceSharedLoadBalancerHealthProbePort defines the target port of the shared health probe. Default to 10256. + ClusterServiceSharedLoadBalancerHealthProbePort int32 `json:"clusterServiceSharedLoadBalancerHealthProbePort,omitempty" yaml:"clusterServiceSharedLoadBalancerHealthProbePort,omitempty"` + + // ClusterServiceSharedLoadBalancerHealthProbePath defines the target path of the shared health probe. Default to `/healthz`. + ClusterServiceSharedLoadBalancerHealthProbePath string `json:"clusterServiceSharedLoadBalancerHealthProbePath,omitempty" yaml:"clusterServiceSharedLoadBalancerHealthProbePath,omitempty"` } // [ServiceOverride "1"] // Service = s3