From 891c8230a5de3f32758751334014bee6ffae8af8 Mon Sep 17 00:00:00 2001 From: vincent178 Date: Wed, 27 May 2020 12:59:53 +0800 Subject: [PATCH] cherry pick #2307 to release-1.1 Signed-off-by: sre-bot --- docs/api-references/docs.md | 34 +++++++ go.mod | 1 + manifests/crd.yaml | 7 ++ .../pingcap/v1alpha1/openapi_generated.go | 28 ++++++ .../v1alpha1/tidbclusterautoscaler_types.go | 16 +++ .../pingcap/v1alpha1/zz_generated.deepcopy.go | 5 + .../autoscaler/autoscaler_manager.go | 2 + pkg/autoscaler/autoscaler/tikv_autoscaler.go | 23 ++++- pkg/autoscaler/autoscaler/util.go | 79 ++++++++++++++- pkg/autoscaler/autoscaler/util_test.go | 97 +++++++++++++++++++ pkg/label/label.go | 6 ++ tests/e2e/tidbcluster/serial.go | 93 +++++++++++++++--- 12 files changed, 370 insertions(+), 21 deletions(-) diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index 320888104d..fdfeba3edd 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -1623,6 +1623,13 @@ TidbMonitorStatus +

AutoScalerPhase

+

+(Appears on: +BasicAutoScalerStatus) +

+

+

BRConfig

(Appears on: @@ -2467,6 +2474,19 @@ to fetch the recommended replicas for TiKV/TiDB

+phase
+ + +AutoScalerPhase + + + + +

Phase describes cluster auto scaling phase

+ + + + metrics
@@ -15183,6 +15203,20 @@ BasicAutoScalerSpec

+ + +readyToScaleThresholdSeconds
+ +int32 + + + +(Optional) +

ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase +should last for before auto scaling. +If not set, the default ReadyToScaleThresholdSeconds will be set to 30.

+ +

TikvAutoScalerStatus

diff --git a/go.mod b/go.mod index 3ac43dbca4..5f8d70beb6 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect github.com/imdario/mergo v0.3.7 // indirect + github.com/jonboulle/clockwork v0.1.0 github.com/juju/errors v0.0.0-20180806074554-22422dad46e1 github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect diff --git a/manifests/crd.yaml b/manifests/crd.yaml index f68d3fbcef..28fae400c5 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -6267,6 +6267,9 @@ spec: minReplicas: format: int32 type: integer + readyToScaleThresholdSeconds: + format: int32 + type: integer scaleInIntervalSeconds: format: int32 type: integer @@ -6310,6 +6313,8 @@ spec: - thresholdValue type: object type: array + phase: + type: string recommendedReplicas: format: int32 type: integer @@ -6339,6 +6344,8 @@ spec: - thresholdValue type: object type: array + phase: + type: string recommendedReplicas: format: int32 type: integer diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 4ce76dc057..bf19a6739c 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -916,6 +916,13 @@ func schema_pkg_apis_pingcap_v1alpha1_BasicAutoScalerStatus(ref common.Reference Description: "BasicAutoScalerStatus describe the basic auto-scaling status", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Description: "Phase describes cluster auto scaling phase", + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -7494,6 +7501,13 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbAutoScalerStatus(ref common.ReferenceC Description: "TidbAutoScalerStatus describe the auto-scaling status of tidb", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Description: "Phase describes cluster auto scaling phase", + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -8526,6 +8540,13 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerSpec(ref common.ReferenceCal Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ExternalEndpoint"), }, }, + "readyToScaleThresholdSeconds": { + SchemaProps: spec.SchemaProps{ + Description: "ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase should last for before auto scaling. If not set, the default ReadyToScaleThresholdSeconds will be set to 30.", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"maxReplicas"}, }, @@ -8542,6 +8563,13 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerStatus(ref common.ReferenceC Description: "TikvAutoScalerStatus describe the auto-scaling status of tikv", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Description: "Phase describes cluster auto scaling phase", + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go index 9ad9fd5b63..e55e234d2e 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go +++ b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go @@ -18,6 +18,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +type AutoScalerPhase string + +const ( + NormalAutoScalerPhase AutoScalerPhase = "Normal" + ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut" + ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn" +) + // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -76,6 +84,12 @@ type TidbClusterAutoScalerSpec struct { // TikvAutoScalerSpec describes the spec for tikv auto-scaling type TikvAutoScalerSpec struct { BasicAutoScalerSpec `json:",inline"` + + // ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase + // should last for before auto scaling. + // If not set, the default ReadyToScaleThresholdSeconds will be set to 30. + // +optional + ReadyToScaleThresholdSeconds *int32 `json:"readyToScaleThresholdSeconds,omitempty"` } // +k8s:openapi-gen=true @@ -180,6 +194,8 @@ type TikvAutoScalerStatus struct { // +k8s:openapi-gen=true // BasicAutoScalerStatus describe the basic auto-scaling status type BasicAutoScalerStatus struct { + // Phase describes cluster auto scaling phase + Phase AutoScalerPhase `json:"phase,omitempty"` // MetricsStatusList describes the metrics status in the last auto-scaling reconciliation // +optional MetricsStatusList []MetricsStatus `json:"metrics,omitempty"` diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index 37ce5091e0..70bae4d33c 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -6793,6 +6793,11 @@ func (in *TidbMonitorStatus) DeepCopy() *TidbMonitorStatus { func (in *TikvAutoScalerSpec) DeepCopyInto(out *TikvAutoScalerSpec) { *out = *in in.BasicAutoScalerSpec.DeepCopyInto(&out.BasicAutoScalerSpec) + if in.ReadyToScaleThresholdSeconds != nil { + in, out := &in.ReadyToScaleThresholdSeconds, &out.ReadyToScaleThresholdSeconds + *out = new(int32) + **out = **in + } return } diff --git a/pkg/autoscaler/autoscaler/autoscaler_manager.go b/pkg/autoscaler/autoscaler/autoscaler_manager.go index f52a4449ca..67bc83bf4f 100644 --- a/pkg/autoscaler/autoscaler/autoscaler_manager.go +++ b/pkg/autoscaler/autoscaler/autoscaler_manager.go @@ -151,6 +151,8 @@ func (am *autoScalerManager) updateAutoScaling(oldTc *v1alpha1.TidbCluster, return nil, nil } + tac.Annotations[label.AnnLastSyncingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + if tac.Spec.TiKV != nil { if oldTc.Status.TiKV.StatefulSet != nil { tac.Status.TiKV.CurrentReplicas = oldTc.Status.TiKV.StatefulSet.CurrentReplicas diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler.go b/pkg/autoscaler/autoscaler/tikv_autoscaler.go index 4b15ff8d98..28d7ced390 100644 --- a/pkg/autoscaler/autoscaler/tikv_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tikv_autoscaler.go @@ -70,12 +70,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti // The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to // add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds - if recommendedReplicas > tc.Spec.TiKV.Replicas { - intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds + if recommendedReplicas > currentReplicas { + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase { + tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase + // phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut, + // reset timestamp in both cases. + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + } + } else { + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase { + tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase + // phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn, + // reset timestamp in both cases. + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + } } - ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType) + + ableToScale, err := checkStsAutoScaling(tac, *tac.Spec.TiKV.ReadyToScaleThresholdSeconds, *intervalSeconds, v1alpha1.TiKVMemberType) if err != nil { return err } @@ -98,7 +110,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string { // we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) if recommendedReplicas > currentReplicas { newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts)) if newlyScaleOutOrdinalSets.Len() > 0 { @@ -113,6 +124,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v } } + tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase + tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) tc.Spec.TiKV.Replicas = recommendedReplicas tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas return nil diff --git a/pkg/autoscaler/autoscaler/util.go b/pkg/autoscaler/autoscaler/util.go index dbc1efdd65..6a17dd5185 100644 --- a/pkg/autoscaler/autoscaler/util.go +++ b/pkg/autoscaler/autoscaler/util.go @@ -18,7 +18,9 @@ import ( "strconv" "time" + "github.com/jonboulle/clockwork" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" operatorUtils "github.com/pingcap/tidb-operator/pkg/util" appsv1 "k8s.io/api/apps/v1" @@ -57,11 +59,81 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool { return true } -// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling -func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { +func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { + realClock := clockwork.NewRealClock() + if tac.Annotations == nil { + tac.Annotations = map[string]string{} + } + // 3*controller.ResyncDuration is maximum time allowed before reset phase status + ableToScale, err := checkLastSyncingTimestamp(tac, 3*controller.ResyncDuration, realClock) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + return true, nil +} + +// checkLastSyncingTimestamp reset TiKV phase if last auto scaling timestamp is longer than thresholdSec +func checkLastSyncingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec time.Duration, clock clockwork.Clock) (bool, error) { if tac.Annotations == nil { tac.Annotations = map[string]string{} } + + lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnLastSyncingTimestamp] + if !existed { + // NOTE: because record autoscaler sync timestamp happens after check auto scale, + // label will not exist during first sync, return allow auto scale in this case. + return true, nil + } + t, err := strconv.ParseInt(lastAutoScalingTimestamp, 10, 64) + if err != nil { + return false, err + } + // if there's no resync within thresholdSec, reset TiKV phase to Normal + if clock.Now().After(time.Unix(t, 0).Add(thresholdSec)) { + tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase + return false, nil + } + return true, nil +} + +// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale +func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) { + readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] + + if !existed { + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix()) + return false, nil + } + t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32) + if err != nil { + return false, err + } + readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) + if thresholdSeconds > readyAutoScalingSec { + return false, nil + } + return true, nil +} + +// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling +func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] if memberType == v1alpha1.TiKVMemberType { lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] @@ -147,6 +219,9 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) { tac.Spec.TiKV.MetricsTimeDuration = pointer.StringPtr("3m") } } + if tac.Spec.TiKV.ReadyToScaleThresholdSeconds == nil { + tac.Spec.TiKV.ReadyToScaleThresholdSeconds = pointer.Int32Ptr(30) + } } if tac.Spec.TiDB != nil { diff --git a/pkg/autoscaler/autoscaler/util_test.go b/pkg/autoscaler/autoscaler/util_test.go index 6668dcf070..5b5fd15b6c 100644 --- a/pkg/autoscaler/autoscaler/util_test.go +++ b/pkg/autoscaler/autoscaler/util_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/jonboulle/clockwork" . "github.com/onsi/gomega" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" @@ -26,6 +27,102 @@ import ( "k8s.io/utils/pointer" ) +func Test_checkLastSyncingTimestamp(t *testing.T) { + g := NewGomegaWithT(t) + c := clockwork.NewFakeClockAt(time.Now()) + tests := []struct { + name string + withTimestamp bool + lastSyncSecAgo int + expectPhase v1alpha1.AutoScalerPhase + expectedPermitScaling bool + }{ + { + name: "tikv, no timestamp", + withTimestamp: false, + lastSyncSecAgo: 0, + expectPhase: v1alpha1.ReadyToScaleOutAutoScalerPhase, + expectedPermitScaling: true, + }, + { + name: "tikv, last sync 10s ago", + withTimestamp: true, + lastSyncSecAgo: 10, + expectPhase: v1alpha1.ReadyToScaleOutAutoScalerPhase, + expectedPermitScaling: true, + }, + { + name: "tikv, last sync 120s ago", + withTimestamp: true, + lastSyncSecAgo: 120, + expectPhase: v1alpha1.NormalAutoScalerPhase, + expectedPermitScaling: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tac := newTidbClusterAutoScaler() + tac.Status.TiKV = &v1alpha1.TikvAutoScalerStatus{BasicAutoScalerStatus: v1alpha1.BasicAutoScalerStatus{Phase: v1alpha1.ReadyToScaleOutAutoScalerPhase}} + if tt.withTimestamp { + d := time.Duration(tt.lastSyncSecAgo) * time.Second + tac.Annotations[label.AnnLastSyncingTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix()) + } else { + tac.Annotations = map[string]string{} + } + r, err := checkLastSyncingTimestamp(tac, 100*time.Second, c) + g.Expect(err).Should(BeNil()) + g.Expect(r).Should(Equal(tt.expectedPermitScaling)) + g.Expect(tac.Status.TiKV.Phase).Should(Equal(tt.expectPhase)) + }) + } +} + +func TestCheckStsReadyAutoScalingTimestamp(t *testing.T) { + g := NewGomegaWithT(t) + c := clockwork.NewFakeClockAt(time.Now()) + tests := []struct { + name string + withTimestamp bool + readyAutoScalingSec int + expectedPermitScaling bool + }{ + { + name: "tikv, no timestamp", + withTimestamp: false, + readyAutoScalingSec: 0, + expectedPermitScaling: false, + }, + { + name: "tikv, ready autoscaling 60s", + withTimestamp: true, + readyAutoScalingSec: 60, + expectedPermitScaling: false, + }, + { + name: "tikv, ready autoscaling 120s", + withTimestamp: true, + readyAutoScalingSec: 120, + expectedPermitScaling: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + thresholdSec := int32(100) + tac := newTidbClusterAutoScaler() + d := time.Duration(tt.readyAutoScalingSec) * time.Second + if tt.withTimestamp { + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix()) + } else { + tac.Annotations = map[string]string{} + } + r, err := checkStsReadyAutoScalingTimestamp(tac, thresholdSec, c) + g.Expect(err).Should(BeNil()) + g.Expect(r).Should(Equal(tt.expectedPermitScaling)) + }) + } +} + func TestCheckStsAutoScalingInterval(t *testing.T) { g := NewGomegaWithT(t) tests := []struct { diff --git a/pkg/label/label.go b/pkg/label/label.go index cecbc7870e..432ad8a301 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -109,6 +109,12 @@ const ( // AnnTiKVLastAutoScalingTimestamp is annotation key of tidbclusterto which ordinal is created by tikv auto-scaling AnnTiKVLastAutoScalingTimestamp = "tikv.tidb.pingcap.com/last-autoscaling-timestamp" + // AnnTiKVReadyToScaleTimestamp records timestamp when tikv ready to scale + AnnTiKVReadyToScaleTimestamp = "tikv.tidb.pingcap.com/ready-to-scale-timestamp" + + // AnnLastSyncingTimestamp records last sync timestamp + AnnLastSyncingTimestamp = "tidb.pingcap.com/last-syncing-timestamp" + // AnnTiDBConsecutiveScaleOutCount describes the least consecutive count to scale-out for tidb AnnTiDBConsecutiveScaleOutCount = "tidb.tidb.pingcap.com/consecutive-scale-out-count" // AnnTiDBConsecutiveScaleInCount describes the least consecutive count to scale-in for tidb diff --git a/tests/e2e/tidbcluster/serial.go b/tests/e2e/tidbcluster/serial.go index a4c97fa1f7..4c16feda15 100644 --- a/tests/e2e/tidbcluster/serial.go +++ b/tests/e2e/tidbcluster/serial.go @@ -435,6 +435,7 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { MetricsTimeDuration: &duration, ScaleInIntervalSeconds: pointer.Int32Ptr(100), }, + ReadyToScaleThresholdSeconds: pointer.Int32Ptr(40), } tac.Spec.TiKV.Metrics = []autoscalingv2beta2.MetricSpec{} tac.Spec.TiKV.Metrics = append(tac.Spec.TiKV.Metrics, defaultMetricSpec) @@ -445,23 +446,49 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { framework.ExpectNoError(err, "create pdapi error") defer cancel() var firstScaleTimestamp int64 - err = wait.Poll(10*time.Second, 10*time.Minute, func() (done bool, err error) { + var readyToScaleTimestamp int64 + err = wait.Poll(10*time.Second, 5*time.Minute, func() (done bool, err error) { + tac, err = cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + if tac.Annotations == nil || len(tac.Annotations) < 1 { + framework.Logf("tac haven't marked any annotation") + return false, nil + } + t, ok := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] + if !ok { + framework.Logf("tac has no tikv.tidb.pingcap.com/ready-to-scale-timestamp annotation") + return false, nil + } + readyToScaleTimestamp, err = strconv.ParseInt(t, 10, 64) + if err != nil { + return false, err + } + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase { + framework.Logf("tac dont' have the right ReadyToScale phase, expect: %s, got %s", v1alpha1.ReadyToScaleOutAutoScalerPhase, tac.Status.TiKV.Phase) + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "check tikv has ready-to-scale-timestamp") + err = wait.Poll(10*time.Second, 5*time.Minute, func() (done bool, err error) { tc, err := cli.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}) if err != nil { return false, nil } // check replicas if tc.Spec.TiKV.Replicas != int32(4) { - klog.Infof("tikv haven't auto-scale to 4 replicas") + framework.Logf("tikv haven't auto-scale to 4 replicas") return false, nil } if len(tc.Status.TiKV.Stores) != 4 { - klog.Infof("tikv's stores haven't auto-scale to 4") + framework.Logf("tikv's stores haven't auto-scale to 4") return false, nil } // check annotations if tc.Annotations == nil || len(tc.Annotations) < 1 { - klog.Infof("tc haven't marked any annotation") + framework.Logf("tc haven't marked any annotation") return false, nil } tac, err = cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) @@ -469,18 +496,25 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { return false, nil } if tac.Annotations == nil || len(tac.Annotations) < 1 { - klog.Infof("tac haven't marked any annotation") + framework.Logf("tac haven't marked any annotation") return false, nil } v, ok := tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] if !ok { - klog.Infof("tac haven't marked any annotation") + framework.Logf("tac has no tikv.tidb.pingcap.com/last-autoscaling-timestamp annotation") return false, nil } firstScaleTimestamp, err = strconv.ParseInt(v, 10, 64) if err != nil { return false, err } + // check readyToScaleTimestamp + if time.Now().Sub(time.Unix(readyToScaleTimestamp, 0)).Seconds() < 40 { + return false, fmt.Errorf("tikv doesn't meet the ReadyToScale threshold") + } + if tac.Status.TiKV.Phase != v1alpha1.NormalAutoScalerPhase { + return false, fmt.Errorf("tikv don't have right ReadyToScale phase") + } // check store label storeId := "" for k, v := range tc.Status.TiKV.Stores { @@ -509,7 +543,7 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { return false, nil }) framework.ExpectNoError(err, "check tikv auto-scale to 4 error") - klog.Info("success to check tikv auto scale-out to 4 replicas") + framework.Logf("success to check tikv auto scale-out to 4 replicas") mp = &mock.MonitorParams{ Name: tc.Name, @@ -523,23 +557,48 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { err = mock.SetPrometheusResponse(monitor.Name, monitor.Namespace, mp, fw) framework.ExpectNoError(err, "set tikv mock metrics error") + err = wait.Poll(10*time.Second, 5*time.Minute, func() (done bool, err error) { + tac, err = cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + if tac.Annotations == nil || len(tac.Annotations) < 1 { + framework.Logf("tac haven't marked any annotation") + return false, nil + } + t, ok := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] + if !ok { + framework.Logf("tac has no tikv.tidb.pingcap.com/ready-to-scale-timestamp annotation") + return false, nil + } + readyToScaleTimestamp, err = strconv.ParseInt(t, 10, 64) + if err != nil { + return false, err + } + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase { + framework.Logf("tac dont' have the right ReadyToScale phase, expect: %s, got %s", v1alpha1.ReadyToScaleOutAutoScalerPhase, tac.Status.TiKV.Phase) + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "check tikv has ready-to-scale-timestamp") err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) { tc, err = cli.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}) if err != nil { return false, nil } if tc.Spec.TiKV.Replicas != 3 { - klog.Info("tikv haven't auto-scale to 3 replicas") + framework.Logf("tikv haven't auto-scale to 3 replicas") return false, nil } if len(tc.Status.TiKV.Stores) != 3 { - klog.Info("tikv's store haven't auto-scale to 3") + framework.Logf("tikv's store haven't auto-scale to 3") return false, nil } if tc.Annotations != nil && len(tc.Annotations) > 0 { _, ok := tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] if ok { - klog.Infof("tikv auto-scale out annotation still exists") + framework.Logf("tikv auto-scale out annotation still exists") return false, nil } } @@ -548,12 +607,12 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { return false, nil } if tac.Annotations == nil || len(tac.Annotations) < 1 { - klog.Infof("tc haven't marked any annotation") + framework.Logf("tac haven't marked any annotation") return false, nil } v, ok := tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] if !ok { - klog.Infof("tac haven't marked any annotation") + framework.Logf("tac has no tikv.tidb.pingcap.com/last-autoscaling-timestamp annotation") return false, nil } secondTs, err := strconv.ParseInt(v, 10, 64) @@ -561,16 +620,22 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { return false, err } if secondTs == firstScaleTimestamp { - klog.Info("tikv haven't scale yet") + framework.Logf("tikv haven't scale yet") return false, nil } if secondTs-firstScaleTimestamp < 100 { return false, fmt.Errorf("tikv second scale's interval isn't meeting the interval requirement") } + if time.Now().Sub(time.Unix(readyToScaleTimestamp, 0)).Seconds() < 40 { + return false, fmt.Errorf("tikv doesn't meet the ReadyToScale threshold") + } + if tac.Status.TiKV.Phase != v1alpha1.NormalAutoScalerPhase { + return false, fmt.Errorf("tikv don't have right ReadyToScale phase") + } return true, nil }) framework.ExpectNoError(err, "check tikv auto-scale to 3 error") - klog.Info("success to check tikv auto scale-in to 3 replicas") + framework.Logf("success to check tikv auto scale-in to 3 replicas") mp = &mock.MonitorParams{ Name: tc.Name,