From 82f458f1f4cbb24c0ab491a2297e168e150796c5 Mon Sep 17 00:00:00 2001 From: Maciej Borsz Date: Wed, 24 Aug 2022 07:51:42 +0000 Subject: [PATCH] Implement CL2_RATE_LIMIT_POD_CREATION that drops artificial rate limiting --- .../common/wait_for_controlled_pods.go | 7 ++++++- clusterloader2/testing/load/config.yaml | 19 +++++++++++++++++++ .../load/modules/reconcile-objects.yaml | 9 +++++---- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/clusterloader2/pkg/measurement/common/wait_for_controlled_pods.go b/clusterloader2/pkg/measurement/common/wait_for_controlled_pods.go index e1032645f..f196881c8 100644 --- a/clusterloader2/pkg/measurement/common/wait_for_controlled_pods.go +++ b/clusterloader2/pkg/measurement/common/wait_for_controlled_pods.go @@ -19,6 +19,7 @@ package common import ( "context" "fmt" + "os" "strings" "sync" "time" @@ -447,7 +448,11 @@ func (w *waitForControlledPodsRunningMeasurement) handleObjectLocked(oldObj, new } operationTimeout := w.operationTimeout - if isObjDeleted || isScalingDown { + // exactOperationTimeout controls whether we should skip multiplying by two operationTimeout on scale down/deletion. + // Defaults to false for backward compatibility. + // TODO(mborsz): Change default to true and remove. + _, exactOperationTimeout := os.LookupEnv("CL2_WAIT_FOR_CONTROLLED_PODS_USE_EXACT_OPERATION_TIMEOUT") + if !exactOperationTimeout && (isObjDeleted || isScalingDown) { // In case of deleting pods, twice as much time is required. // The pod deletion throughput equals half of the pod creation throughput. // NOTE: Starting from k8s 1.23 it's not true anymore, at least not in all cases. diff --git a/clusterloader2/testing/load/config.yaml b/clusterloader2/testing/load/config.yaml index f774686fb..60f6d07b5 100644 --- a/clusterloader2/testing/load/config.yaml +++ b/clusterloader2/testing/load/config.yaml @@ -12,6 +12,7 @@ {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} {{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}} {{$DELETE_TEST_THROUGHPUT := DefaultParam .CL2_DELETE_TEST_THROUGHPUT $LOAD_TEST_THROUGHPUT}} +{{$RATE_LIMIT_POD_CREATION := DefaultParam .CL2_RATE_LIMIT_POD_CREATION true}} {{$BIG_GROUP_SIZE := DefaultParam .BIG_GROUP_SIZE 250}} {{$MEDIUM_GROUP_SIZE := DefaultParam .MEDIUM_GROUP_SIZE 30}} {{$SMALL_GROUP_SIZE := DefaultParam .SMALL_GROUP_SIZE 5}} @@ -152,7 +153,13 @@ steps: params: actionName: "create" namespaces: {{$namespaces}} + {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedSaturationTimeLimited + operationTimeout: 15m + {{else}} + tuningSet: Global100qps + operationTimeout: {{AddInt $saturationTime 900}}s + {{end}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} # We rely on the fact that daemonset is using the same image as the 'pod-startup-latency' module. # The goal is to cache the image to all nodes before we start any latency pod, @@ -261,7 +268,13 @@ steps: params: actionName: "scale and update" namespaces: {{$namespaces}} + {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedScalingTimeLimited + operationTimeout: 15m + {{else}} + tuningSet: Global100qps + operationTimeout: {{AddInt (DivideInt $saturationTime 4) 900}}s + {{end}} randomScaleFactor: {{$RANDOM_SCALE_FACTOR}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} daemonSetImage: {{$latencyPodImage}} @@ -289,7 +302,13 @@ steps: params: actionName: "delete" namespaces: {{$namespaces}} + {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedDeletionTimeLimited + operationTimeout: 15m + {{else}} + tuningSet: Global100qps + operationTimeout: {{AddInt $deletionTime 900}}s + {{end}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} daemonSetReplicas: 0 bigDeploymentSize: {{$BIG_GROUP_SIZE}} diff --git a/clusterloader2/testing/load/modules/reconcile-objects.yaml b/clusterloader2/testing/load/modules/reconcile-objects.yaml index 27566d9c4..58e41b8b9 100644 --- a/clusterloader2/testing/load/modules/reconcile-objects.yaml +++ b/clusterloader2/testing/load/modules/reconcile-objects.yaml @@ -10,6 +10,7 @@ {{$minReplicaFactor := SubtractFloat 1 $randomScaleFactor}} {{$maxReplicaFactor := AddFloat 1 $randomScaleFactor}} {{$testMaxReplicaFactor := AddFloat 1 .testMaxReplicaFactor}} +{{$operationTimeout := .operationTimeout}} # DaemonSets {{$daemonSetImage := DefaultParam .daemonSetImage "k8s.gcr.io/pause:3.0"}} @@ -75,7 +76,7 @@ steps: action: start checkIfPodsAreUpdated: {{$CHECK_IF_PODS_ARE_UPDATED}} labelSelector: group = load - operationTimeout: 15m + operationTimeout: {{$operationTimeout}} - name: {{$actionName}} phases: @@ -212,7 +213,7 @@ steps: min: 1 max: {{$namespaces}} replicasPerNamespace: 0 - tuningSet: RandomizedDeletionTimeLimited + tuningSet: {{$tuningSet}} objectBundle: {{range $ssIndex := Loop $pvSmallStatefulSetSize}} - basename: pv-small-statefulset-{{$ssIndex}} @@ -226,7 +227,7 @@ steps: min: 1 max: {{$namespaces}} replicasPerNamespace: 0 - tuningSet: RandomizedDeletionTimeLimited + tuningSet: {{$tuningSet}} objectBundle: {{range $ssIndex := Loop $pvMediumStatefulSetSize}} - basename: pv-medium-statefulset-{{$ssIndex}} @@ -254,5 +255,5 @@ steps: Params: desiredPVCCount: 0 labelSelector: group = load - timeout: 15m + timeout: {{$operationTimeout}} {{end}}